Skip to content

aarch64: use intrinsics::simd for horizontal add and abs #1872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 31 additions & 147 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,46 +298,40 @@ pub fn vabsq_f64(a: float64x2_t) -> float64x2_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(abs))]
pub fn vabs_s64(a: int64x1_t) -> int64x1_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.abs.v1i64"
)]
fn _vabs_s64(a: int64x1_t) -> int64x1_t;
unsafe {
let neg: int64x1_t = simd_neg(a);
let mask: int64x1_t = simd_ge(a, neg);
simd_select(mask, a, neg)
}
unsafe { _vabs_s64(a) }
}
#[doc = "Absolute Value (wrapping)."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64)"]
Comment on lines -301 to +308
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the diff here is wonky because the order of the definitions changed.

#[inline]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(abs))]
pub fn vabsd_s64(a: i64) -> i64 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.abs.i64"
)]
fn _vabsd_s64(a: i64) -> i64;
pub fn vabsq_s64(a: int64x2_t) -> int64x2_t {
unsafe {
let neg: int64x2_t = simd_neg(a);
let mask: int64x2_t = simd_ge(a, neg);
simd_select(mask, a, neg)
}
unsafe { _vabsd_s64(a) }
}
#[doc = "Absolute Value (wrapping)."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64)"]
#[inline]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(abs))]
pub fn vabsq_s64(a: int64x2_t) -> int64x2_t {
pub fn vabsd_s64(a: i64) -> i64 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.abs.v2i64"
link_name = "llvm.aarch64.neon.abs.i64"
)]
fn _vabsq_s64(a: int64x2_t) -> int64x2_t;
fn _vabsd_s64(a: i64) -> i64;
}
unsafe { _vabsq_s64(a) }
unsafe { _vabsd_s64(a) }
}
#[doc = "Add"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_s64)"]
Expand Down Expand Up @@ -604,14 +598,7 @@ pub fn vaddvq_f64(a: float64x2_t) -> f64 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vaddv_s32(a: int32x2_t) -> i32 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.saddv.i32.v2i32"
)]
fn _vaddv_s32(a: int32x2_t) -> i32;
}
unsafe { _vaddv_s32(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s8)"]
Expand All @@ -620,14 +607,7 @@ pub fn vaddv_s32(a: int32x2_t) -> i32 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddv_s8(a: int8x8_t) -> i8 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.saddv.i8.v8i8"
)]
fn _vaddv_s8(a: int8x8_t) -> i8;
}
unsafe { _vaddv_s8(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s8)"]
Expand All @@ -636,14 +616,7 @@ pub fn vaddv_s8(a: int8x8_t) -> i8 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddvq_s8(a: int8x16_t) -> i8 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.saddv.i8.v16i8"
)]
fn _vaddvq_s8(a: int8x16_t) -> i8;
}
unsafe { _vaddvq_s8(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s16)"]
Expand All @@ -652,14 +625,7 @@ pub fn vaddvq_s8(a: int8x16_t) -> i8 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddv_s16(a: int16x4_t) -> i16 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.saddv.i16.v4i16"
)]
fn _vaddv_s16(a: int16x4_t) -> i16;
}
unsafe { _vaddv_s16(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s16)"]
Expand All @@ -668,14 +634,7 @@ pub fn vaddv_s16(a: int16x4_t) -> i16 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddvq_s16(a: int16x8_t) -> i16 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.saddv.i16.v8i16"
)]
fn _vaddvq_s16(a: int16x8_t) -> i16;
}
unsafe { _vaddvq_s16(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s32)"]
Expand All @@ -684,14 +643,7 @@ pub fn vaddvq_s16(a: int16x8_t) -> i16 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddvq_s32(a: int32x4_t) -> i32 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.saddv.i32.v4i32"
)]
fn _vaddvq_s32(a: int32x4_t) -> i32;
}
unsafe { _vaddvq_s32(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u32)"]
Expand All @@ -700,14 +652,7 @@ pub fn vaddvq_s32(a: int32x4_t) -> i32 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vaddv_u32(a: uint32x2_t) -> u32 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"
)]
fn _vaddv_u32(a: uint32x2_t) -> u32;
}
unsafe { _vaddv_u32(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u8)"]
Expand All @@ -716,14 +661,7 @@ pub fn vaddv_u32(a: uint32x2_t) -> u32 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddv_u8(a: uint8x8_t) -> u8 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.uaddv.i8.v8i8"
)]
fn _vaddv_u8(a: uint8x8_t) -> u8;
}
unsafe { _vaddv_u8(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u8)"]
Expand All @@ -732,14 +670,7 @@ pub fn vaddv_u8(a: uint8x8_t) -> u8 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.uaddv.i8.v16i8"
)]
fn _vaddvq_u8(a: uint8x16_t) -> u8;
}
unsafe { _vaddvq_u8(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u16)"]
Expand All @@ -748,14 +679,7 @@ pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddv_u16(a: uint16x4_t) -> u16 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.uaddv.i16.v4i16"
)]
fn _vaddv_u16(a: uint16x4_t) -> u16;
}
unsafe { _vaddv_u16(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u16)"]
Expand All @@ -764,14 +688,7 @@ pub fn vaddv_u16(a: uint16x4_t) -> u16 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.uaddv.i16.v8i16"
)]
fn _vaddvq_u16(a: uint16x8_t) -> u16;
}
unsafe { _vaddvq_u16(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u32)"]
Expand All @@ -780,14 +697,7 @@ pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addv))]
pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"
)]
fn _vaddvq_u32(a: uint32x4_t) -> u32;
}
unsafe { _vaddvq_u32(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s64)"]
Expand All @@ -796,14 +706,7 @@ pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vaddvq_s64(a: int64x2_t) -> i64 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.saddv.i64.v2i64"
)]
fn _vaddvq_s64(a: int64x2_t) -> i64;
}
unsafe { _vaddvq_s64(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add across vector"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u64)"]
Expand All @@ -812,14 +715,7 @@ pub fn vaddvq_s64(a: int64x2_t) -> i64 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vaddvq_u64(a: uint64x2_t) -> u64 {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"
)]
fn _vaddvq_u64(a: uint64x2_t) -> u64;
}
unsafe { _vaddvq_u64(a) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Multi-vector floating-point absolute maximum"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamax_f32)"]
Expand Down Expand Up @@ -15951,23 +15847,11 @@ pub fn vpadds_f32(a: float32x2_t) -> f32 {
#[doc = "Add pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
#[inline]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddd_s64(a: int64x2_t) -> i64 {
unsafe { transmute(vaddvq_u64(transmute(a))) }
}
#[doc = "Add pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
#[inline]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddd_s64(a: int64x2_t) -> i64 {
let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
unsafe { transmute(vaddvq_u64(transmute(a))) }
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Add pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_u64)"]
Expand All @@ -15976,7 +15860,7 @@ pub fn vpaddd_s64(a: int64x2_t) -> i64 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
vaddvq_u64(a)
unsafe { simd_reduce_add_unordered(a) }
}
#[doc = "Floating-point add pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"]
Expand Down
Loading