diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 32f144bc7a..319cef2b1a 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -298,46 +298,40 @@ pub fn vabsq_f64(a: float64x2_t) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(abs))] pub fn vabs_s64(a: int64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v1i64" - )] - fn _vabs_s64(a: int64x1_t) -> int64x1_t; + unsafe { + let neg: int64x1_t = simd_neg(a); + let mask: int64x1_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabs_s64(a) } } #[doc = "Absolute Value (wrapping)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(abs))] -pub fn vabsd_s64(a: i64) -> i64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.i64" - )] - fn _vabsd_s64(a: i64) -> i64; +pub fn vabsq_s64(a: int64x2_t) -> int64x2_t { + unsafe { + let neg: int64x2_t = simd_neg(a); + let mask: int64x2_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabsd_s64(a) } } #[doc = "Absolute Value (wrapping)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(abs))] -pub fn vabsq_s64(a: int64x2_t) -> int64x2_t { +pub fn vabsd_s64(a: i64) -> i64 { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v2i64" + link_name = "llvm.aarch64.neon.abs.i64" )] - fn _vabsq_s64(a: int64x2_t) -> int64x2_t; + fn _vabsd_s64(a: i64) -> i64; } - unsafe { _vabsq_s64(a) } + unsafe { _vabsd_s64(a) } } #[doc = "Add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_s64)"] @@ -604,14 +598,7 @@ pub fn vaddvq_f64(a: float64x2_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vaddv_s32(a: int32x2_t) -> i32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddv.i32.v2i32" - )] - fn _vaddv_s32(a: int32x2_t) -> i32; - } - unsafe { _vaddv_s32(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s8)"] @@ -620,14 +607,7 @@ pub fn vaddv_s32(a: int32x2_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddv_s8(a: int8x8_t) -> i8 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddv.i8.v8i8" - )] - fn _vaddv_s8(a: int8x8_t) -> i8; - } - unsafe { _vaddv_s8(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s8)"] @@ -636,14 +616,7 @@ pub fn vaddv_s8(a: int8x8_t) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddvq_s8(a: int8x16_t) -> i8 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddv.i8.v16i8" - )] - fn _vaddvq_s8(a: int8x16_t) -> i8; - } - unsafe { _vaddvq_s8(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s16)"] @@ -652,14 +625,7 @@ pub fn vaddvq_s8(a: int8x16_t) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddv_s16(a: int16x4_t) -> i16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddv.i16.v4i16" - )] - fn _vaddv_s16(a: int16x4_t) -> i16; - } - unsafe { _vaddv_s16(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s16)"] @@ -668,14 +634,7 @@ pub fn vaddv_s16(a: int16x4_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddvq_s16(a: int16x8_t) -> i16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddv.i16.v8i16" - )] - fn _vaddvq_s16(a: int16x8_t) -> i16; - } - unsafe { _vaddvq_s16(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s32)"] @@ -684,14 +643,7 @@ pub fn vaddvq_s16(a: int16x8_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddvq_s32(a: int32x4_t) -> i32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddv.i32.v4i32" - )] - fn _vaddvq_s32(a: int32x4_t) -> i32; - } - unsafe { _vaddvq_s32(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u32)"] @@ -700,14 +652,7 @@ pub fn vaddvq_s32(a: int32x4_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vaddv_u32(a: uint32x2_t) -> u32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddv.i32.v2i32" - )] - fn _vaddv_u32(a: uint32x2_t) -> u32; - } - unsafe { _vaddv_u32(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u8)"] @@ -716,14 +661,7 @@ pub fn vaddv_u32(a: uint32x2_t) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddv_u8(a: uint8x8_t) -> u8 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddv.i8.v8i8" - )] - fn _vaddv_u8(a: uint8x8_t) -> u8; - } - unsafe { _vaddv_u8(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u8)"] @@ -732,14 +670,7 @@ pub fn vaddv_u8(a: uint8x8_t) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddvq_u8(a: uint8x16_t) -> u8 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddv.i8.v16i8" - )] - fn _vaddvq_u8(a: uint8x16_t) -> u8; - } - unsafe { _vaddvq_u8(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u16)"] @@ -748,14 +679,7 @@ pub fn vaddvq_u8(a: uint8x16_t) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddv_u16(a: uint16x4_t) -> u16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddv.i16.v4i16" - )] - fn _vaddv_u16(a: uint16x4_t) -> u16; - } - unsafe { _vaddv_u16(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u16)"] @@ -764,14 +688,7 @@ pub fn vaddv_u16(a: uint16x4_t) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddvq_u16(a: uint16x8_t) -> u16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddv.i16.v8i16" - )] - fn _vaddvq_u16(a: uint16x8_t) -> u16; - } - unsafe { _vaddvq_u16(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u32)"] @@ -780,14 +697,7 @@ pub fn vaddvq_u16(a: uint16x8_t) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addv))] pub fn vaddvq_u32(a: uint32x4_t) -> u32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddv.i32.v4i32" - )] - fn _vaddvq_u32(a: uint32x4_t) -> u32; - } - unsafe { _vaddvq_u32(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s64)"] @@ -796,14 +706,7 @@ pub fn vaddvq_u32(a: uint32x4_t) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vaddvq_s64(a: int64x2_t) -> i64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddv.i64.v2i64" - )] - fn _vaddvq_s64(a: int64x2_t) -> i64; - } - unsafe { _vaddvq_s64(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u64)"] @@ -812,14 +715,7 @@ pub fn vaddvq_s64(a: int64x2_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vaddvq_u64(a: uint64x2_t) -> u64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddv.i64.v2i64" - )] - fn _vaddvq_u64(a: uint64x2_t) -> u64; - } - unsafe { _vaddvq_u64(a) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Multi-vector floating-point absolute maximum"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamax_f32)"] @@ -15951,23 +15847,11 @@ pub fn vpadds_f32(a: float32x2_t) -> f32 { #[doc = "Add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddd_s64(a: int64x2_t) -> i64 { - unsafe { transmute(vaddvq_u64(transmute(a))) } -} -#[doc = "Add pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(addp))] -pub fn vpaddd_s64(a: int64x2_t) -> i64 { - let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; - unsafe { transmute(vaddvq_u64(transmute(a))) } + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_u64)"] @@ -15976,7 +15860,7 @@ pub fn vpaddd_s64(a: int64x2_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddd_u64(a: uint64x2_t) -> u64 { - vaddvq_u64(a) + unsafe { simd_reduce_add_unordered(a) } } #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"] diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index b61d577777..4bf926c356 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -1483,15 +1483,11 @@ pub fn vabsq_f32(a: float32x4_t) -> float32x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vabs_s8(a: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")] - fn _vabs_s8(a: int8x8_t) -> int8x8_t; + unsafe { + let neg: int8x8_t = simd_neg(a); + let mask: int8x8_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabs_s8(a) } } #[doc = "Absolute value (wrapping)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s8)"] @@ -1512,15 +1508,11 @@ pub fn vabs_s8(a: int8x8_t) -> int8x8_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vabsq_s8(a: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")] - fn _vabsq_s8(a: int8x16_t) -> int8x16_t; + unsafe { + let neg: int8x16_t = simd_neg(a); + let mask: int8x16_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabsq_s8(a) } } #[doc = "Absolute value (wrapping)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s16)"] @@ -1541,15 +1533,11 @@ pub fn vabsq_s8(a: int8x16_t) -> int8x16_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vabs_s16(a: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")] - fn _vabs_s16(a: int16x4_t) -> int16x4_t; + unsafe { + let neg: int16x4_t = simd_neg(a); + let mask: int16x4_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabs_s16(a) } } #[doc = "Absolute value (wrapping)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s16)"] @@ -1570,15 +1558,11 @@ pub fn vabs_s16(a: int16x4_t) -> int16x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vabsq_s16(a: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")] - fn _vabsq_s16(a: int16x8_t) -> int16x8_t; + unsafe { + let neg: int16x8_t = simd_neg(a); + let mask: int16x8_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabsq_s16(a) } } #[doc = "Absolute value (wrapping)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s32)"] @@ -1599,15 +1583,11 @@ pub fn vabsq_s16(a: int16x8_t) -> int16x8_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vabs_s32(a: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")] - fn _vabs_s32(a: int32x2_t) -> int32x2_t; + unsafe { + let neg: int32x2_t = simd_neg(a); + let mask: int32x2_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabs_s32(a) } } #[doc = "Absolute value (wrapping)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s32)"] @@ -1628,15 +1608,11 @@ pub fn vabs_s32(a: int32x2_t) -> int32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vabsq_s32(a: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.abs.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")] - fn _vabsq_s32(a: int32x4_t) -> int32x4_t; + unsafe { + let neg: int32x4_t = simd_neg(a); + let mask: int32x4_t = simd_ge(a, neg); + simd_select(mask, a, neg) } - unsafe { _vabsq_s32(a) } } #[doc = "Floating-point absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsh_f16)"] diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index f0dce681d9..8fa33e5bb3 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -13023,6 +13023,26 @@ intrinsics: - link: "llvm.aarch64.crc32cx" arch: aarch64,arm64ec + - name: "vabsd_s64" + doc: "Absolute Value (wrapping)." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [abs] + safety: safe + types: + - [i64, i64] + compose: + # This is behaviorally equivalent to `i64::wrapping_abs`, but keeps the value in a SIMD + # register. That can be beneficial when combined with other instructions. This LLVM + # issue provides some extra context https://github.com/llvm/llvm-project/issues/148388. + - LLVMLink: + name: "vabsd_s64" + links: + - link: "llvm.aarch64.neon.abs.i64" + arch: aarch64,arm64ec + - name: "{type[0]}" doc: "Absolute Value (wrapping)." arguments: ["a: {type[1]}"] @@ -13032,15 +13052,18 @@ intrinsics: assert_instr: [abs] safety: safe types: - - ['vabsd_s64', i64, i64] - ['vabs_s64', int64x1_t, v1i64] - ['vabsq_s64', int64x2_t, v2i64] compose: - - LLVMLink: - name: "{type[0]}" - links: - - link: "llvm.aarch64.neon.abs.{type[2]}" - arch: aarch64,arm64ec + - Let: + - neg + - "{type[1]}" + - FnCall: [simd_neg, [a]] + - Let: + - mask + - "{type[1]}" + - FnCall: [simd_ge, [a, neg]] + - FnCall: [simd_select, [mask, a, neg]] - name: "vuqadd{neon_type[0].no}" doc: "Signed saturating Accumulate of Unsigned value." @@ -13142,11 +13165,7 @@ intrinsics: types: - [int64x2_t, i64] compose: - - FnCall: - - transmute - - - FnCall: - - "vaddvq_u64" - - - FnCall: [transmute, [a]] + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vpaddd_u64" doc: "Add pairwise" @@ -13159,7 +13178,7 @@ intrinsics: types: - [uint64x2_t, u64] compose: - - FnCall: [vaddvq_u64, [a]] + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vaddv{neon_type[0].no}" doc: "Add across vector" @@ -13176,11 +13195,7 @@ intrinsics: - [int16x8_t, i16] - [int32x4_t, i32] compose: - - LLVMLink: - name: "vaddv{neon_type[0].no}" - links: - - link: "llvm.aarch64.neon.saddv.{type[1]}.{neon_type[0]}" - arch: aarch64,arm64ec + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vaddv{neon_type[0].no}" doc: "Add across vector" @@ -13193,11 +13208,7 @@ intrinsics: types: - [int32x2_t, i32] compose: - - LLVMLink: - name: "vaddv{neon_type[0].no}" - links: - - link: "llvm.aarch64.neon.saddv.i32.{neon_type[0]}" - arch: aarch64,arm64ec + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vaddv{neon_type[0].no}" doc: "Add across vector" @@ -13210,11 +13221,7 @@ intrinsics: types: - [int64x2_t, i64] compose: - - LLVMLink: - name: "vaddv{neon_type[0].no}" - links: - - link: "llvm.aarch64.neon.saddv.i64.{neon_type[0]}" - arch: aarch64,arm64ec + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vaddv{neon_type[0].no}" doc: "Add across vector" @@ -13231,11 +13238,7 @@ intrinsics: - [uint16x8_t, u16] - [uint32x4_t, u32] compose: - - LLVMLink: - name: "vaddv{neon_type[0].no}" - links: - - link: "llvm.aarch64.neon.uaddv.{type[1]}.{neon_type[0]}" - arch: aarch64,arm64ec + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vaddv{neon_type[0].no}" doc: "Add across vector" @@ -13248,11 +13251,7 @@ intrinsics: types: - [uint32x2_t, u32, i32] compose: - - LLVMLink: - name: "vaddv{neon_type[0].no}" - links: - - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}" - arch: aarch64,arm64ec + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vaddv{neon_type[0].no}" doc: "Add across vector" @@ -13265,11 +13264,7 @@ intrinsics: types: - [uint64x2_t, u64, i64] compose: - - LLVMLink: - name: "vaddv{neon_type[0].no}" - links: - - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}" - arch: aarch64,arm64ec + - FnCall: [simd_reduce_add_unordered, [a]] - name: "vaddlv{neon_type[0].no}" doc: "Signed Add Long across Vector" diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index c836a47a74..58c26ae4f8 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -12861,13 +12861,16 @@ intrinsics: - int16x8_t - int32x4_t compose: - - LLVMLink: - name: "vabs{neon_type.no}" - links: - - link: "llvm.aarch64.neon.abs.{neon_type}" - arch: aarch64,arm64ec - - link: "llvm.arm.neon.vabs.{neon_type}" - arch: arm + - Let: + - neg + - "{neon_type}" + - FnCall: [simd_neg, [a]] + - Let: + - mask + - "{neon_type}" + - FnCall: [simd_ge, [a, neg]] + - FnCall: [simd_select, [mask, a, neg]] + - name: "vpmin{neon_type.no}" doc: "Folding minimum of adjacent pairs"