llvm · paulwalker-arm · Jun 26, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
@@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
   // * When compiling for SVE only, the caller must be in non-streaming mode.
   // * When compiling for both SVE and SME, the caller can be in either mode.
   if (BuiltinType == SemaARM::VerifyRuntimeMode) {
-    llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
-    S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
-    CallerFeatureMapWithoutSVE["sve"] = false;
+    llvm::StringMap<bool> CallerFeatures;
+    S.Context.getFunctionFeatureMap(CallerFeatures, FD);
 
     // Avoid emitting diagnostics for a function that can never compile.
-    if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
+    if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
       return false;
 
-    llvm::StringMap<bool> CallerFeatureMapWithoutSME;
-    S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
-    CallerFeatureMapWithoutSME["sme"] = false;
+    const auto FindTopLevelPipe = [](const char *S) {
+      unsigned Depth = 0;
+      unsigned I = 0, E = strlen(S);
+      for (; I < E; ++I) {
+        if (S[I] == '|' && Depth == 0)
+          break;
+        if (S[I] == '(')
+          ++Depth;
+        else if (S[I] == ')')
+          --Depth;
+      }
+      return I;
+    };
+
+    const char *RequiredFeatures =
+        S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
+    unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
+    assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
+           "Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
+    StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
+    StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);
 
-    // We know the builtin requires either some combination of SVE flags, or
-    // some combination of SME flags, but we need to figure out which part
-    // of the required features is satisfied by the target features.
-    //
-    // For a builtin with target guard 'sve2p1|sme2', if we compile with
-    // '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
-    // evaluate the features for '+sve2p1,+sme,+nosme'.
-    //
-    // Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
-    // the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
-    StringRef BuiltinTargetGuards(
-        S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
     bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
-        BuiltinTargetGuards, CallerFeatureMapWithoutSME);
+        NonStreamingBuiltinGuard, CallerFeatures);
     bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
-        BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
+        StreamingBuiltinGuard, CallerFeatures);
 
     if ((SatisfiesSVE && SatisfiesSME) ||
         (SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))

diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
@@ -1,8 +1,10 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sme2p1 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 
 // REQUIRES: aarch64-registered-target
 
@@ -15,6 +17,14 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: @test_svrax1_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
@@ -25,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2) ATTR
 {
   return SVE_ACLE_FUNC(svrax1,_s64,,)(op1, op2);
 }
@@ -40,7 +50,7 @@ svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svrax1_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svrax1_u64(svuint64_t op1, svuint64_t op2) ATTR
 {
   return SVE_ACLE_FUNC(svrax1,_u64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
@@ -20,6 +26,14 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_s8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -32,7 +46,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdup_laneq_s8(svint8_t zn) {
+svint8_t test_svdup_laneq_s8(svint8_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0);
 }
 
@@ -48,7 +62,7 @@ svint8_t test_svdup_laneq_s8(svint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
+svuint8_t test_svdup_laneq_u8(svuint8_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15);
 }
 
@@ -64,7 +78,7 @@ svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svdup_laneq_s16(svint16_t zn) {
+svint16_t test_svdup_laneq_s16(svint16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1);
 }
 
@@ -80,7 +94,7 @@ svint16_t test_svdup_laneq_s16(svint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
+svuint16_t test_svdup_laneq_u16(svuint16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7);
 }
 
@@ -96,7 +110,7 @@ svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdup_laneq_s32(svint32_t zn) {
+svint32_t test_svdup_laneq_s32(svint32_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2);
 }
 
@@ -112,7 +126,7 @@ svint32_t test_svdup_laneq_s32(svint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
+svuint32_t test_svdup_laneq_u32(svuint32_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3);
 }
 
@@ -128,7 +142,7 @@ svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdup_laneq_s64(svint64_t zn) {
+svint64_t test_svdup_laneq_s64(svint64_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0);
 }
 
@@ -144,7 +158,7 @@ svint64_t test_svdup_laneq_s64(svint64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
+svuint64_t test_svdup_laneq_u64(svuint64_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1);
 }
 
@@ -160,7 +174,7 @@ svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
+svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4);
 }
 
@@ -176,7 +190,7 @@ svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
+svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1);
 }
 
@@ -192,7 +206,7 @@ svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
+svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1);
 }
 
@@ -208,7 +222,7 @@ svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
+svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
 }
 
@@ -224,6 +238,6 @@ svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svdup_laneq_mf8(svmfloat8_t zn) {
+svmfloat8_t test_svdup_laneq_mf8(svmfloat8_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _mf8)(zn, 1);
 }