Skip to content

Commit 65b7c8d

Browse files
author
Soumya AR
committed
aarch64: Extend SVE2 bit-select instructions for Neon modes.
NBSL, BSL1N, and BSL2N are bit-select intructions on SVE2 with certain operands inverted. These can be extended to work with Neon modes. Since these instructions are unpredicated, duplicate patterns were added with the predicate removed to generate these instructions for Neon modes. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. Signed-off-by: Soumya AR <[email protected]> gcc/ChangeLog: * config/aarch64/aarch64-sve2.md (*aarch64_sve2_nbsl_unpred<mode>): New pattern to match unpredicated form. (*aarch64_sve2_bsl1n_unpred<mode>): Likewise. (*aarch64_sve2_bsl2n_unpred<mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/bitsel.c: New test.
1 parent ee2f19b commit 65b7c8d

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

gcc/config/aarch64/aarch64-sve2.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,6 +1732,23 @@
17321732
}
17331733
)
17341734

1735+
(define_insn "*aarch64_sve2_nbsl_unpred<mode>"
1736+
[(set (match_operand:VDQ_I 0 "register_operand")
1737+
(not:VDQ_I
1738+
(xor:VDQ_I
1739+
(and:VDQ_I
1740+
(xor:VDQ_I
1741+
(match_operand:VDQ_I 1 "register_operand")
1742+
(match_operand:VDQ_I 2 "register_operand"))
1743+
(match_operand:VDQ_I 3 "register_operand"))
1744+
(match_dup BSL_DUP))))]
1745+
"TARGET_SVE2"
1746+
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1747+
[ w , <bsl_1st> , <bsl_2nd> , w ; * ] nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1748+
[ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1749+
}
1750+
)
1751+
17351752
;; Unpredicated bitwise select with inverted first operand.
17361753
;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
17371754
(define_expand "@aarch64_sve2_bsl1n<mode>"
@@ -1777,6 +1794,23 @@
17771794
}
17781795
)
17791796

1797+
(define_insn "*aarch64_sve2_bsl1n_unpred<mode>"
1798+
[(set (match_operand:VDQ_I 0 "register_operand")
1799+
(xor:VDQ_I
1800+
(and:VDQ_I
1801+
(not:VDQ_I
1802+
(xor:VDQ_I
1803+
(match_operand:VDQ_I 1 "register_operand")
1804+
(match_operand:VDQ_I 2 "register_operand")))
1805+
(match_operand:VDQ_I 3 "register_operand"))
1806+
(match_dup BSL_DUP)))]
1807+
"TARGET_SVE2"
1808+
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1809+
[ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1810+
[ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1811+
}
1812+
)
1813+
17801814
;; Unpredicated bitwise select with inverted second operand.
17811815
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
17821816
(define_expand "@aarch64_sve2_bsl2n<mode>"
@@ -1851,6 +1885,38 @@
18511885
}
18521886
)
18531887

1888+
(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
1889+
[(set (match_operand:VDQ_I 0 "register_operand")
1890+
(ior:VDQ_I
1891+
(and:VDQ_I
1892+
(match_operand:VDQ_I 1 "register_operand")
1893+
(match_operand:VDQ_I 2 "register_operand"))
1894+
(and:VDQ_I
1895+
(not:VDQ_I (match_operand:VDQ_I 3 "register_operand"))
1896+
(not:VDQ_I (match_dup BSL_DUP)))))]
1897+
"TARGET_SVE2"
1898+
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1899+
[ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1900+
[ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1901+
}
1902+
)
1903+
1904+
(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
1905+
[(set (match_operand:VDQ_I 0 "register_operand")
1906+
(ior:VDQ_I
1907+
(and:VDQ_I
1908+
(match_operand:VDQ_I 1 "register_operand")
1909+
(match_operand:VDQ_I 2 "register_operand"))
1910+
(and:VDQ_I
1911+
(not:VDQ_I (match_dup BSL_DUP))
1912+
(not:VDQ_I (match_operand:VDQ_I 3 "register_operand")))))]
1913+
"TARGET_SVE2"
1914+
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1915+
[ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1916+
[ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1917+
}
1918+
)
1919+
18541920
;; -------------------------------------------------------------------------
18551921
;; ---- [INT] Shift-and-accumulate operations
18561922
;; -------------------------------------------------------------------------
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/* { dg-options "-O2 -mcpu=neoverse-v2 --param aarch64-autovec-preference=asimd-only" } */
2+
3+
#include <stdint.h>
4+
5+
#define OPNBSL(x,y,z) (~(((x) & (z)) | ((y) & ~(z))))
6+
#define OPBSL1N(x,y,z) ((~(x) & (z)) | ((y) & ~(z)))
7+
#define OPBSL2N(x,y,z) (((x) & (z)) | (~(y) & ~(z)))
8+
9+
#define N 1024
10+
11+
#define TYPE(N) int##N##_t
12+
13+
#define TEST(SIZE, OP, SUFFIX) \
14+
void __attribute__ ((noinline, noclone)) \
15+
f_##SIZE##_##SUFFIX \
16+
(TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
17+
TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d) \
18+
{ \
19+
for (int i = 0; i < N; i++) \
20+
a[i] = OP (b[i], c[i], d[i]); \
21+
}
22+
23+
#define TEST_ALL(SIZE) \
24+
TEST(SIZE, OPNBSL, nbsl) \
25+
TEST(SIZE, OPBSL1N, bsl1n) \
26+
TEST(SIZE, OPBSL2N, bsl2n)
27+
28+
TEST_ALL(8);
29+
TEST_ALL(16);
30+
TEST_ALL(32);
31+
TEST_ALL(64);
32+
33+
/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
34+
/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
35+
/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */

0 commit comments

Comments
 (0)