Skip to content

Commit 1a4430f

Browse files
q66ebassi
authored andcommitted
Implement vector intrinsics for Clang
This is disabled on 32-bit targets in the same way as GCC builds are in order to produce correctly functional builds that can be used across compilers.
1 parent 779a662 commit 1a4430f

File tree

8 files changed

+55
-38
lines changed

8 files changed

+55
-38
lines changed

doc/graphene-sections.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,10 +631,12 @@ GRAPHENE_MINOR_VERSION
631631
GRAPHENE_MICRO_VERSION
632632
<SUBSECTION Standard>
633633
GRAPHENE_HAS_GCC
634+
GRAPHENE_HAS_INTRINSICS
634635
GRAPHENE_HAS_SCALAR
635636
GRAPHENE_HAS_SSE
636637
GRAPHENE_HAS_ARM_NEON
637638
GRAPHENE_USE_GCC
639+
GRAPHENE_USE_INTRINSICS
638640
GRAPHENE_USE_SCALAR
639641
GRAPHENE_USE_SSE
640642
GRAPHENE_USE_ARM_NEON

include/graphene-config.h.meson

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ extern "C" {
2525

2626
# if defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) && !defined(__arm__)
2727
#mesondefine GRAPHENE_HAS_GCC
28+
#mesondefine GRAPHENE_HAS_INTRINSICS
29+
# endif
30+
31+
# if defined(__clang__) && !defined(__arm__)
32+
#mesondefine GRAPHENE_HAS_INTRINSICS
2833
# endif
2934

3035
# define GRAPHENE_HAS_SCALAR 1
@@ -36,9 +41,12 @@ extern "C" {
3641
# elif defined(GRAPHENE_HAS_ARM_NEON)
3742
# define GRAPHENE_USE_ARM_NEON
3843
# define GRAPHENE_SIMD_S "neon"
39-
# elif defined(GRAPHENE_HAS_GCC)
40-
# define GRAPHENE_USE_GCC
41-
# define GRAPHENE_SIMD_S "gcc"
44+
# elif defined(GRAPHENE_HAS_INTRINSICS)
45+
# if defined(GRAPHENE_HAS_GCC)
46+
# define GRAPHENE_USE_GCC
47+
# endif
48+
# define GRAPHENE_USE_INTRINSICS
49+
# define GRAPHENE_SIMD_S "intrinsics"
4250
# elif defined(GRAPHENE_HAS_SCALAR)
4351
# define GRAPHENE_USE_SCALAR
4452
# define GRAPHENE_SIMD_S "scalar"
@@ -69,7 +77,7 @@ typedef __m128 graphene_simd4f_t;
6977
# include <arm_neon.h>
7078
# endif
7179
typedef float32x4_t graphene_simd4f_t;
72-
# elif defined(GRAPHENE_USE_GCC)
80+
# elif defined(GRAPHENE_USE_INTRINSICS)
7381
typedef float graphene_simd4f_t __attribute__((vector_size(16)));
7482
# elif defined(GRAPHENE_USE_SCALAR)
7583
typedef struct {

include/graphene-simd4f.h

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -779,12 +779,20 @@ _simd4f_neg (const graphene_simd4f_t s)
779779

780780
# endif /* !__GNUC__ && !_MSC_VER */
781781

782-
#elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_GCC)
782+
#elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_INTRINSICS)
783783

784-
/* GCC vector intrinsic implementation of SIMD 4f */
784+
/* GCC/Clang vector intrinsic implementation of SIMD 4f */
785785

786786
typedef int graphene_simd4i_t __attribute__((vector_size (16)));
787787

788+
#if defined(__clang__)
789+
#define __graphene_simd_shuffle1(a, m1, m2, m3, m4) __builtin_shufflevector(a, a, m1, m2, m3, m4)
790+
#define __graphene_simd_shuffle2(a, b, m1, m2, m3, m4) __builtin_shufflevector(a, b, m1, m2, m3, m4)
791+
#else
792+
#define __graphene_simd_shuffle1(a, m1, m2, m3, m4) __builtin_shuffle(a, (const graphene_simd4i_t){m1, m2, m3, m4})
793+
#define __graphene_simd_shuffle2(a, b, m1, m2, m3, m4) __builtin_shuffle(a, b, (const graphene_simd4i_t){m1, m2, m3, m4})
794+
#endif
795+
788796
# define graphene_simd4f_init(x,y,z,w) \
789797
(__extension__ ({ \
790798
(graphene_simd4f_t) { (x), (y), (z), (w) }; \
@@ -944,50 +952,42 @@ typedef int graphene_simd4i_t __attribute__((vector_size (16)));
944952

945953
# define graphene_simd4f_shuffle_wxyz(v) \
946954
(__extension__ ({ \
947-
const graphene_simd4i_t __mask = { 3, 0, 1, 2 }; \
948-
(graphene_simd4f_t) __builtin_shuffle ((v), __mask); \
955+
(graphene_simd4f_t) __graphene_simd_shuffle1((v), 3, 0, 1, 2); \
949956
}))
950957

951958
# define graphene_simd4f_shuffle_zwxy(v) \
952959
(__extension__ ({ \
953-
const graphene_simd4i_t __mask = { 2, 3, 0, 1 }; \
954-
(graphene_simd4f_t) __builtin_shuffle ((v), __mask); \
960+
(graphene_simd4f_t) __graphene_simd_shuffle1((v), 2, 3, 0, 1); \
955961
}))
956962

957963
# define graphene_simd4f_shuffle_yzwx(v) \
958964
(__extension__ ({ \
959-
const graphene_simd4i_t __mask = { 1, 2, 3, 0 }; \
960-
(graphene_simd4f_t) __builtin_shuffle ((v), __mask); \
965+
(graphene_simd4f_t) __graphene_simd_shuffle1((v), 1, 2, 3, 0); \
961966
}))
962967

963968
# define graphene_simd4f_zero_w(v) \
964969
(__extension__ ({ \
965-
const graphene_simd4i_t __mask = { 0, 1, 2, 4 }; \
966-
(graphene_simd4f_t) __builtin_shuffle ((v), graphene_simd4f_init_zero (), __mask); \
970+
(graphene_simd4f_t) __graphene_simd_shuffle2((v), graphene_simd4f_init_zero (), 0, 1, 2, 4); \
967971
}))
968972

969973
# define graphene_simd4f_zero_zw(v) \
970974
(__extension__ ({ \
971-
const graphene_simd4i_t __mask = { 0, 1, 4, 4 }; \
972-
(graphene_simd4f_t) __builtin_shuffle ((v), graphene_simd4f_init_zero (), __mask); \
975+
(graphene_simd4f_t) __graphene_simd_shuffle2((v), graphene_simd4f_init_zero (), 0, 1, 4, 4); \
973976
}))
974977

975978
# define graphene_simd4f_merge_w(s,v) \
976979
(__extension__ ({ \
977-
const graphene_simd4i_t __mask = { 0, 1, 2, 4 }; \
978-
(graphene_simd4f_t) __builtin_shuffle ((s), graphene_simd4f_splat ((v)), __mask); \
980+
(graphene_simd4f_t) __graphene_simd_shuffle2((s), graphene_simd4f_splat ((v)), 0, 1, 2, 4); \
979981
}))
980982

981983
# define graphene_simd4f_merge_high(a,b) \
982984
(__extension__ ({ \
983-
const graphene_simd4i_t __mask = { 2, 3, 6, 7 }; \
984-
(graphene_simd4f_t) __builtin_shuffle ((a), (b), __mask); \
985+
(graphene_simd4f_t) __graphene_simd_shuffle2((a), (b), 2, 3, 6, 7); \
985986
}))
986987

987988
# define graphene_simd4f_merge_low(a,b) \
988989
(__extension__ ({ \
989-
const graphene_simd4i_t __mask = { 0, 1, 4, 5 }; \
990-
(graphene_simd4f_t) __builtin_shuffle ((a), (b), __mask); \
990+
(graphene_simd4f_t) __graphene_simd_shuffle2((a), (b), 0, 1, 4, 5); \
991991
}))
992992

993993
# define graphene_simd4f_flip_sign_0101(v) \

include/graphene-simd4x4f.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ void graphene_simd4x4f_transpose_in_place (graphene_simd4x4f_t *s);
157157
_MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w)
158158
#endif
159159

160-
#elif defined(GRAPHENE_USE_GCC)
160+
#elif defined(GRAPHENE_USE_INTRINSICS)
161161

162162
#define graphene_simd4x4f_transpose_in_place(s) \
163163
(__extension__ ({ \

meson.build

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -326,31 +326,38 @@ int main () {
326326
endif
327327
endif
328328

329-
# GCC vector intrinsics
329+
# GCC/Clang vector intrinsics
330330
if get_option('gcc_vector')
331-
gcc_vector_prog = '''
332-
#if defined(__GNUC__)
333-
# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
334-
# error "GCC vector intrinsics are disabled on GCC prior to 4.9"
331+
vector_intrin_prog = '''
332+
#if defined(__GNUC__) // Clang advertises __GNUC__ too
333+
# if __GNUC__ < 4 || ((__GNUC__ == 4 && __GNUC_MINOR__ < 9) && !defined(__clang__))
334+
# error "GCC/Clang vector intrinsics are disabled on GCC prior to 4.9"
335335
# elif defined(__arm__)
336-
# error "GCC vector intrinsics are disabled on ARM"
336+
# error "GCC/Clang vector intrinsics are disabled on ARM"
337337
# elif (__SIZEOF_POINTER__ < 8)
338-
# error "GCC vector intrinsics are disabled on 32bit"
338+
# error "GCC/Clang vector intrinsics are disabled on 32bit"
339339
# endif
340340
#else
341-
# error "Need GCC for GCC vectors intrinsics"
341+
# error "Need GCC or Clang for vector intrinsics"
342342
#endif
343343
typedef float simd4f __attribute__((vector_size(16)));
344344
typedef int simd4i __attribute__((vector_size(16)));
345345
int main () {
346346
simd4f s = { 1.f, 2.f, 3.f, 4.f };
347+
#ifdef __clang__
348+
simd4f r = __builtin_shufflevector (s, s, 0, 1, 1, 3);
349+
#else
347350
simd4i m = { 0, 1, 1, 3 };
348351
simd4f r = __builtin_shuffle (s, m);
352+
#endif
349353
return 0;
350354
}'''
351-
if cc.compiles(gcc_vector_prog, name: 'GCC vector intrinsics')
352-
graphene_conf.set('GRAPHENE_HAS_GCC', 1)
353-
graphene_simd += [ 'gcc' ]
355+
if cc.compiles(vector_intrin_prog, name: 'Vector intrinsics')
356+
if cc.get_id() == 'gcc'
357+
graphene_conf.set('GRAPHENE_HAS_GCC', 1)
358+
endif
359+
graphene_conf.set('GRAPHENE_HAS_INTRINSICS', 1)
360+
graphene_simd += [ 'intrinsics' ]
354361
endif
355362
endif
356363

@@ -439,7 +446,7 @@ summary({
439446

440447
summary({
441448
'SSE': graphene_simd.contains('sse2'),
442-
'GCC vector': graphene_simd.contains('gcc'),
449+
'GCC/Clang vector': graphene_simd.contains('intrinsics'),
443450
'ARM NEON': graphene_simd.contains('neon'),
444451
},
445452
section: 'SIMD',

meson_options.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ option('introspection', type: 'feature',
1010
description: 'Enable GObject Introspection (depends on GObject)')
1111
option('gcc_vector', type: 'boolean',
1212
value: true,
13-
description: 'Enable GCC vector fast paths (requires GCC)')
13+
description: 'Enable compiler vector intrinsics (requires GCC or Clang)')
1414
option('sse2', type: 'boolean',
1515
value: true,
1616
description: 'Enable SSE2 fast paths (requires SSE2 or later)')

src/graphene-simd4f.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
*/
5959

6060
/* fast paths are all defined in the graphene-simd4f.h header */
61-
#if defined(GRAPHENE_USE_SSE) || defined(GRAPHENE_USE_GCC) || defined(GRAPHENE_USE_ARM_NEON)
61+
#if defined(GRAPHENE_USE_SSE) || defined(GRAPHENE_USE_INTRINSICS) || defined(GRAPHENE_USE_ARM_NEON)
6262

6363
/**
6464
* graphene_simd4f_init:

src/graphene-simd4x4f.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
#include <string.h>
4444
#include <math.h>
4545

46-
#if defined(GRAPHENE_USE_SSE) || defined(GRAPHENE_USE_GCC) || defined(GRAPHENE_USE_ARM_NEON)
46+
#if defined(GRAPHENE_USE_SSE) || defined(GRAPHENE_USE_INTRINSICS) || defined(GRAPHENE_USE_ARM_NEON)
4747

4848
/**
4949
* graphene_simd4x4f_transpose_in_place:

0 commit comments

Comments
 (0)