|
33 | 33 | #if defined(NEON_INTRINSICS_ENABLED)
|
34 | 34 | #include <arm_neon.h>
|
35 | 35 |
|
| 36 | +#ifndef vuzp1q_u8 |
| 37 | +/* Read even numbered bytes and interleave */ |
| 38 | +static inline uint8x16_t vuzp1q_u8(uint8x16_t a, uint8x16_t b) |
| 39 | +{ |
| 40 | + uint8x16_t rc = a; |
| 41 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 0), rc, 0); |
| 42 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 0), rc, 1); |
| 43 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 2), rc, 2); |
| 44 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 2), rc, 3); |
| 45 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 4), rc, 4); |
| 46 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 4), rc, 5); |
| 47 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 6), rc, 6); |
| 48 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 6), rc, 7); |
| 49 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 8), rc, 8); |
| 50 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 8), rc, 9); |
| 51 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 10), rc, 10); |
| 52 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 10), rc, 11); |
| 53 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 12), rc, 12); |
| 54 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 12), rc, 13); |
| 55 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 14), rc, 14); |
| 56 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 14), rc, 15); |
| 57 | + return rc; |
| 58 | +} |
| 59 | +#endif |
| 60 | + |
| 61 | +#ifndef vuzp2q_u8 |
| 62 | +/* Read odd numbered bytes and interleave */ |
| 63 | +static inline uint8x16_t vuzp2q_u8(uint8x16_t a, uint8x16_t b) |
| 64 | +{ |
| 65 | + uint8x16_t rc = a; |
| 66 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 1), rc, 0); |
| 67 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 1), rc, 1); |
| 68 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 3), rc, 2); |
| 69 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 3), rc, 3); |
| 70 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 5), rc, 4); |
| 71 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 5), rc, 5); |
| 72 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 7), rc, 6); |
| 73 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 7), rc, 7); |
| 74 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 9), rc, 8); |
| 75 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 9), rc, 9); |
| 76 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 11), rc, 10); |
| 77 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 11), rc, 11); |
| 78 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 13), rc, 12); |
| 79 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 13), rc, 13); |
| 80 | + rc = vsetq_lane_u8(vgetq_lane_u8(a, 15), rc, 14); |
| 81 | + rc = vsetq_lane_u8(vgetq_lane_u8(b, 15), rc, 15); |
| 82 | + return rc; |
| 83 | +} |
| 84 | +#endif |
| 85 | + |
36 | 86 | static primitives_t* generic = NULL;
|
37 | 87 |
|
38 | 88 | static INLINE uint8x8_t neon_YUV2R_single(uint16x8_t C, int16x8_t D, int16x8_t E)
|
|
0 commit comments