Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,026 changes: 1,026 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def

Large diffs are not rendered by default.

56 changes: 22 additions & 34 deletions clang/include/clang/Basic/BuiltinsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,6 @@ def __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64 : AMDGPUBuiltin<"double(doub
def __builtin_amdgcn_raw_ptr_buffer_load_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">;
def __builtin_amdgcn_struct_ptr_buffer_load_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">;

def __builtin_amdgcn_global_load_b128 : AMDGPUBuiltin<"_ExtVector<4, unsigned int>(_ExtVector<4, unsigned int address_space<1> *>, char const *)", [], "gfx9-insts">;
def __builtin_amdgcn_global_store_b128 : AMDGPUBuiltin<"void(_ExtVector<4, unsigned int address_space<1> *>, _ExtVector<4, unsigned int>, char const *)", [], "gfx9-insts">;

//===----------------------------------------------------------------------===//
// Ballot builtins.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -383,43 +380,34 @@ def __builtin_amdgcn_endpgm : AMDGPUBuiltin<"void()", [NoReturn]>;
def __builtin_amdgcn_get_fpenv : AMDGPUBuiltin<"uint64_t()">;
def __builtin_amdgcn_set_fpenv : AMDGPUBuiltin<"void(uint64_t)">;

// These are special FE only builtins intended for forwarding the requirements
// to the ME.
def __builtin_amdgcn_processor_is : AMDGPUBuiltin<"__amdgpu_feature_predicate_t(char const *)", [Const, CustomTypeChecking, UnevaluatedArguments]>;
def __builtin_amdgcn_is_invocable : AMDGPUBuiltin<"__amdgpu_feature_predicate_t()", [Const, CustomTypeChecking, UnevaluatedArguments]>;

//===----------------------------------------------------------------------===//

// Wave Reduction builtins.

//===----------------------------------------------------------------------===//

def __builtin_amdgcn_wave_reduce_add_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_sub_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_i32 : AMDGPUBuiltin<"int32_t(int32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_i32 : AMDGPUBuiltin<"int32_t(int32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_and_b32 : AMDGPUBuiltin<"int32_t(int32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_or_b32 : AMDGPUBuiltin<"int32_t(int32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_xor_b32 : AMDGPUBuiltin<"int32_t(int32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_add_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_sub_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_i64 : AMDGPUBuiltin<"int64_t(int64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_i64 : AMDGPUBuiltin<"int64_t(int64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_and_b64 : AMDGPUBuiltin<"int64_t(int64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_or_b64 : AMDGPUBuiltin<"int64_t(int64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_xor_b64 : AMDGPUBuiltin<"int64_t(int64_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fadd_f32 : AMDGPUBuiltin<"float(float, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fsub_f32 : AMDGPUBuiltin<"float(float, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fmin_f32 : AMDGPUBuiltin<"float(float, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fmax_f32 : AMDGPUBuiltin<"float(float, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fadd_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fsub_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fmin_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fmax_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_add_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_sub_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_i32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_i32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_and_b32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_or_b32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_xor_b32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_add_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_sub_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_i64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_i64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_max_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_and_b64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_or_b64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_xor_b64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fadd_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fsub_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fmin_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fmax_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;

//===----------------------------------------------------------------------===//
// R600-NI only builtins.
Expand Down
4 changes: 0 additions & 4 deletions clang/include/clang/Basic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,6 @@ clang_tablegen(Builtins.inc -gen-clang-builtins
SOURCE Builtins.td
TARGET ClangBuiltins)

clang_tablegen(BuiltinsAMDGPU.inc -gen-clang-builtins
SOURCE BuiltinsAMDGPU.td
TARGET ClangBuiltinsAMDGPU)

clang_tablegen(BuiltinsBPF.inc -gen-clang-builtins
SOURCE BuiltinsBPF.td
TARGET ClangBuiltinsBPF)
Expand Down
5 changes: 2 additions & 3 deletions clang/include/clang/Basic/TargetBuiltins.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,8 @@ namespace clang {
namespace AMDGPU {
enum {
LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
#define GET_BUILTIN_ENUMERATORS
#include "clang/Basic/BuiltinsAMDGPU.inc"
#undef GET_BUILTIN_ENUMERATORS
#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
#include "clang/Basic/BuiltinsAMDGPU.def"
LastTSBuiltin
};
}
Expand Down
1 change: 1 addition & 0 deletions clang/include/module.modulemap
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ module Clang_Basic {
textual header "clang/Basic/BuiltinsAArch64.def"
textual header "clang/Basic/BuiltinsAArch64NeonSVEBridge.def"
textual header "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
textual header "clang/Basic/BuiltinsAMDGPU.def"
textual header "clang/Basic/BuiltinsARM.def"
textual header "clang/Basic/BuiltinsHexagonMapCustomDep.def"
textual header "clang/Basic/BuiltinsLoongArchBase.def"
Expand Down
22 changes: 12 additions & 10 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,18 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
static constexpr int NumBuiltins =
clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;

#define GET_BUILTIN_STR_TABLE
#include "clang/Basic/BuiltinsAMDGPU.inc"
#undef GET_BUILTIN_STR_TABLE

static constexpr Builtin::Info BuiltinInfos[] = {
#define GET_BUILTIN_INFOS
#include "clang/Basic/BuiltinsAMDGPU.inc"
#undef GET_BUILTIN_INFOS
};
static_assert(std::size(BuiltinInfos) == NumBuiltins);
static constexpr llvm::StringTable BuiltinStrings =
CLANG_BUILTIN_STR_TABLE_START
#define BUILTIN CLANG_BUILTIN_STR_TABLE
#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
#include "clang/Basic/BuiltinsAMDGPU.def"
;

static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
#define BUILTIN CLANG_BUILTIN_ENTRY
#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
#include "clang/Basic/BuiltinsAMDGPU.def"
});

const char *const AMDGPUTargetInfo::GCCRegNames[] = {
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
Expand Down
8 changes: 0 additions & 8 deletions clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,19 +386,16 @@ static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
return Intrinsic::amdgcn_wave_reduce_add;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
return Intrinsic::amdgcn_wave_reduce_fadd;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
return Intrinsic::amdgcn_wave_reduce_sub;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
return Intrinsic::amdgcn_wave_reduce_fsub;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
return Intrinsic::amdgcn_wave_reduce_min;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
return Intrinsic::amdgcn_wave_reduce_fmin;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
Expand All @@ -407,7 +404,6 @@ static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
return Intrinsic::amdgcn_wave_reduce_max;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
return Intrinsic::amdgcn_wave_reduce_fmax;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
Expand All @@ -431,18 +427,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
Expand Down
84 changes: 0 additions & 84 deletions clang/test/CodeGenOpenCL/builtins-amdgcn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -412,13 +412,6 @@ void test_wave_reduce_fadd_f32_default(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fadd_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fadd_f64_default
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fadd.f64(
void test_wave_reduce_fadd_f64_default(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fadd_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_add_u32_iterative
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32(
void test_wave_reduce_add_u32_iterative(global int* out, int in)
Expand All @@ -440,13 +433,6 @@ void test_wave_reduce_fadd_f32_iterative(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fadd_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fadd_f64_iterative
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fadd.f64(
void test_wave_reduce_fadd_f64_iterative(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fadd_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_add_u32_dpp
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32(
void test_wave_reduce_add_u32_dpp(global int* out, int in)
Expand All @@ -468,13 +454,6 @@ void test_wave_reduce_fadd_f32_dpp(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fadd_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fadd_f64_dpp
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fadd.f64(
void test_wave_reduce_fadd_f64_dpp(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fadd_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_sub_u32_default
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32(
void test_wave_reduce_sub_u32_default(global int* out, int in)
Expand All @@ -496,13 +475,6 @@ void test_wave_reduce_fsub_f32_default(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fsub_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fsub_f64_default
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fsub.f64(
void test_wave_reduce_fsub_f64_default(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fsub_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_sub_u32_iterative
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32(
void test_wave_reduce_sub_u32_iterative(global int* out, int in)
Expand All @@ -524,13 +496,6 @@ void test_wave_reduce_fsub_f32_iterative(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fsub_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fsub_f64_iterative
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fsub.f64(
void test_wave_reduce_fsub_f64_iterative(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fsub_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_sub_u32_dpp
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32(
void test_wave_reduce_sub_u32_dpp(global int* out, int in)
Expand All @@ -552,13 +517,6 @@ void test_wave_reduce_fsub_f32_dpp(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fsub_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fsub_f64_dpp
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fsub.f64(
void test_wave_reduce_fsub_f64_dpp(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fsub_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_and_b32_default
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32(
void test_wave_reduce_and_b32_default(global int* out, int in)
Expand Down Expand Up @@ -706,13 +664,6 @@ void test_wave_reduce_fmin_f32_default(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fmin_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fmin_f64_default
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fmin.f64(
void test_wave_reduce_fmin_f64_default(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fmin_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_min_i32_iterative
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32(
void test_wave_reduce_min_i32_iterative(global int* out, int in)
Expand All @@ -734,13 +685,6 @@ void test_wave_reduce_fmin_f32_iterative(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fmin_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fmin_f64_iterative
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fmin.f64(
void test_wave_reduce_fmin_f64_iterative(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fmin_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_min_i32_dpp
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32(
void test_wave_reduce_min_i32_dpp(global int* out, int in)
Expand All @@ -762,13 +706,6 @@ void test_wave_reduce_fmin_f32_dpp(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fmin_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fmin_f64_dpp
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fmin.f64(
void test_wave_reduce_fmin_f64_dpp(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fmin_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_min_u32_default
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32(
void test_wave_reduce_min_u32_default(global int* out, int in)
Expand Down Expand Up @@ -832,13 +769,6 @@ void test_wave_reduce_fmax_f32_default(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fmax_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fmax_f64_default
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fmax.f64(
void test_wave_reduce_fmax_f64_default(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fmax_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_max_i32_iterative
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32(
void test_wave_reduce_max_i32_iterative(global int* out, int in)
Expand All @@ -860,13 +790,6 @@ void test_wave_reduce_fmax_f32_iterative(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fmax_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fmax_f64_iterative
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fmax.f64(
void test_wave_reduce_fmax_f64_iterative(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fmax_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_max_i32_dpp
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32(
void test_wave_reduce_max_i32_dpp(global int* out, int in)
Expand All @@ -888,13 +811,6 @@ void test_wave_reduce_fmax_f32_dpp(global float* out, float in)
*out = __builtin_amdgcn_wave_reduce_fmax_f32(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_fmax_f64_dpp
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.wave.reduce.fmax.f64(
void test_wave_reduce_fmax_f64_dpp(global double* out, double in)
{
*out = __builtin_amdgcn_wave_reduce_fmax_f64(in, 0);
}

// CHECK-LABEL: @test_wave_reduce_max_u32_default
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32(
void test_wave_reduce_max_u32_default(global int* out, int in)
Expand Down
Loading
Loading