Skip to content

[Clang][AArch64] Add missing builtins for __ARM_FEATURE_SME2p1. #147362

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 77 additions & 73 deletions clang/include/clang/Basic/arm_sve.td

Large diffs are not rendered by default.

47 changes: 26 additions & 21 deletions clang/lib/Sema/SemaARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
// * When compiling for SVE only, the caller must be in non-streaming mode.
// * When compiling for both SVE and SME, the caller can be in either mode.
if (BuiltinType == SemaARM::VerifyRuntimeMode) {
llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
CallerFeatureMapWithoutSVE["sve"] = false;
llvm::StringMap<bool> CallerFeatures;
S.Context.getFunctionFeatureMap(CallerFeatures, FD);

// Avoid emitting diagnostics for a function that can never compile.
if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
return false;

llvm::StringMap<bool> CallerFeatureMapWithoutSME;
S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
CallerFeatureMapWithoutSME["sme"] = false;
const auto FindTopLevelPipe = [](const char *S) {
unsigned Depth = 0;
unsigned I = 0, E = strlen(S);
for (; I < E; ++I) {
if (S[I] == '|' && Depth == 0)
break;
if (S[I] == '(')
++Depth;
else if (S[I] == ')')
--Depth;
}
return I;
};

const char *RequiredFeatures =
S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
"Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);

// We know the builtin requires either some combination of SVE flags, or
// some combination of SME flags, but we need to figure out which part
// of the required features is satisfied by the target features.
//
// For a builtin with target guard 'sve2p1|sme2', if we compile with
// '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
// evaluate the features for '+sve2p1,+sme,+nosme'.
//
// Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
// the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
StringRef BuiltinTargetGuards(
S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
BuiltinTargetGuards, CallerFeatureMapWithoutSME);
NonStreamingBuiltinGuard, CallerFeatures);
bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
StreamingBuiltinGuard, CallerFeatures);

if ((SatisfiesSVE && SatisfiesSME) ||
(SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))
Expand Down
22 changes: 16 additions & 6 deletions clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sme2p1 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

// REQUIRES: aarch64-registered-target

Expand All @@ -15,6 +17,14 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
#define ATTR __arm_streaming_compatible
#elif defined(__ARM_FEATURE_SME)
#define ATTR __arm_streaming
#else
#define ATTR
#endif

// CHECK-LABEL: @test_svrax1_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
Expand All @@ -25,7 +35,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2)
svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2) ATTR
{
return SVE_ACLE_FUNC(svrax1,_s64,,)(op1, op2);
}
Expand All @@ -40,7 +50,7 @@ svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svrax1_u64(svuint64_t op1, svuint64_t op2)
svuint64_t test_svrax1_u64(svuint64_t op1, svuint64_t op2) ATTR
{
return SVE_ACLE_FUNC(svrax1,_u64,,)(op1, op2);
}
40 changes: 27 additions & 13 deletions clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
// RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
// RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
// RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
// RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
// RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
Expand All @@ -20,6 +26,14 @@
#define SVE_ACLE_FUNC(A1, A2) A1##A2
#endif

#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
#define ATTR __arm_streaming_compatible
#elif defined(__ARM_FEATURE_SME)
#define ATTR __arm_streaming
#else
#define ATTR
#endif

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_s8
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
Expand All @@ -32,7 +46,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svdup_laneq_s8(svint8_t zn) {
svint8_t test_svdup_laneq_s8(svint8_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0);
}

Expand All @@ -48,7 +62,7 @@ svint8_t test_svdup_laneq_s8(svint8_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
svuint8_t test_svdup_laneq_u8(svuint8_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15);
}

Expand All @@ -64,7 +78,7 @@ svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svdup_laneq_s16(svint16_t zn) {
svint16_t test_svdup_laneq_s16(svint16_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1);
}

Expand All @@ -80,7 +94,7 @@ svint16_t test_svdup_laneq_s16(svint16_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
svuint16_t test_svdup_laneq_u16(svuint16_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7);
}

Expand All @@ -96,7 +110,7 @@ svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svdup_laneq_s32(svint32_t zn) {
svint32_t test_svdup_laneq_s32(svint32_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2);
}

Expand All @@ -112,7 +126,7 @@ svint32_t test_svdup_laneq_s32(svint32_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
svuint32_t test_svdup_laneq_u32(svuint32_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3);
}

Expand All @@ -128,7 +142,7 @@ svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svdup_laneq_s64(svint64_t zn) {
svint64_t test_svdup_laneq_s64(svint64_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0);
}

Expand All @@ -144,7 +158,7 @@ svint64_t test_svdup_laneq_s64(svint64_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
svuint64_t test_svdup_laneq_u64(svuint64_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1);
}

Expand All @@ -160,7 +174,7 @@ svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4);
}

Expand All @@ -176,7 +190,7 @@ svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1);
}

Expand All @@ -192,7 +206,7 @@ svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1);
}

Expand All @@ -208,7 +222,7 @@ svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
}

Expand All @@ -224,6 +238,6 @@ svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svdup_laneq_mf8(svmfloat8_t zn) {
svmfloat8_t test_svdup_laneq_mf8(svmfloat8_t zn) ATTR {
return SVE_ACLE_FUNC(svdup_laneq, _mf8)(zn, 1);
}
Loading
Loading