diff --git a/fearless_simd/src/generated/simd_trait.rs b/fearless_simd/src/generated/simd_trait.rs index cba1593b..6a9b67d5 100644 --- a/fearless_simd/src/generated/simd_trait.rs +++ b/fearless_simd/src/generated/simd_trait.rs @@ -4,8 +4,8 @@ // This file is autogenerated by fearless_simd_gen use crate::{ - Bytes, Level, Select, SimdCvtFloat, SimdCvtTruncate, SimdElement, SimdFrom, SimdInto, - seal::Seal, + Bytes, Level, Select, SimdCvtFloat, SimdCvtTruncate, SimdElement, SimdFrom, SimdGather, + SimdInto, SimdScatter, seal::Seal, }; use crate::{ f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4, @@ -63,7 +63,9 @@ pub trait Simd: #[doc = r" A native-width SIMD vector of [`f64`]s."] type f64s: SimdFloat, Mask = Self::mask64s>; #[doc = r" A native-width SIMD vector of [`u8`]s."] - type u8s: SimdInt, Mask = Self::mask8s>; + type u8s: SimdInt, Mask = Self::mask8s> + + SimdGather + + SimdScatter; #[doc = r" A native-width SIMD vector of [`i8`]s."] type i8s: SimdInt< Self, @@ -73,7 +75,9 @@ pub trait Simd: Bytes = ::Bytes, > + core::ops::Neg; #[doc = r" A native-width SIMD vector of [`u16`]s."] - type u16s: SimdInt, Mask = Self::mask16s>; + type u16s: SimdInt, Mask = Self::mask16s> + + SimdGather + + SimdScatter; #[doc = r" A native-width SIMD vector of [`i16`]s."] type i16s: SimdInt< Self, @@ -84,7 +88,9 @@ pub trait Simd: > + core::ops::Neg; #[doc = r" A native-width SIMD vector of [`u32`]s."] type u32s: SimdInt, Mask = Self::mask32s> - + SimdCvtTruncate; + + SimdCvtTruncate + + SimdGather + + SimdScatter; #[doc = r" A native-width SIMD vector of [`i32`]s."] type i32s: SimdInt< Self, diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index e9f37172..0bab2e46 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -3,7 +3,10 @@ // This file is autogenerated by fearless_simd_gen -use crate::{Bytes, Select, Simd, SimdBase, SimdCvtFloat, SimdCvtTruncate, SimdFrom, SimdInto}; +use crate::{ + Bytes, Select, Simd, SimdBase, SimdCvtFloat, SimdCvtTruncate, SimdFrom, SimdGather, SimdInto, + SimdScatter, +}; #[doc = "A SIMD vector of 4 [`f32`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, f32x4};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = f32x4::splat(simd, 1.0);\n let b = f32x4::simd_from(simd, 1.0);\n\n // From a slice:\n let c = f32x4::from_slice(simd, &[1.0, 2.0, 3.0, 4.0]);\n\n // From an array:\n let d = f32x4::simd_from(simd, [1.0, 2.0, 3.0, 4.0]);\n\n // From an element-wise function:\n let e = f32x4::from_fn(simd, |i| i as f32);\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(16))] @@ -544,6 +547,80 @@ impl crate::SimdInt for u8x16 { self.simd.max_u8x16(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u8x16 { + type Gathered = [T; 16]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u8x16 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x16( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdCombine for u8x16 { type Combined = u8x32; #[inline(always)] @@ -1008,6 +1085,80 @@ impl crate::SimdInt for u16x8 { self.simd.max_u16x8(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u16x8 { + type Gathered = [T; 8]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u16x8 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x8( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdCombine for u16x8 { type Combined = u16x16; #[inline(always)] @@ -1496,6 +1647,80 @@ impl SimdCvtTruncate> for u32x4 { x.simd.cvt_u32_precise_f32x4(x) } } +impl SimdGather for u32x4 { + type Gathered = [T; 4]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x4( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x4( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u32x4 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x4( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdCombine for u32x4 { type Combined = u32x8; #[inline(always)] @@ -2552,6 +2777,80 @@ impl crate::SimdInt for u8x32 { self.simd.max_u8x32(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u8x32 { + type Gathered = [T; 32]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u8x32 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x32( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdSplit for u8x32 { type Split = u8x16; #[inline(always)] @@ -3047,6 +3346,80 @@ impl crate::SimdInt for u16x16 { self.simd.max_u16x16(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u16x16 { + type Gathered = [T; 16]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u16x16 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x16( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdSplit for u16x16 { type Split = u16x8; #[inline(always)] @@ -3556,6 +3929,80 @@ impl SimdCvtTruncate> for u32x8 { x.simd.cvt_u32_precise_f32x8(x) } } +impl SimdGather for u32x8 { + type Gathered = [T; 8]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u32x8 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x8( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdSplit for u32x8 { type Split = u32x4; #[inline(always)] @@ -4634,6 +5081,80 @@ impl crate::SimdInt for u8x64 { self.simd.max_u8x64(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u8x64 { + type Gathered = [T; 64]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x64( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x64( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u8x64 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x64( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdSplit for u8x64 { type Split = u8x32; #[inline(always)] @@ -5111,6 +5632,80 @@ impl crate::SimdInt for u16x32 { self.simd.max_u16x32(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u16x32 { + type Gathered = [T; 32]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u16x32 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x32( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdSplit for u16x32 { type Split = u16x16; #[inline(always)] @@ -5612,6 +6207,80 @@ impl SimdCvtTruncate> for u32x16 { x.simd.cvt_u32_precise_f32x16(x) } } +impl SimdGather for u32x16 { + type Gathered = [T; 16]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } +} +impl SimdScatter for u32x16 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x16( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } +} impl crate::SimdSplit for u32x16 { type Split = u32x8; #[inline(always)] diff --git a/fearless_simd/src/traits.rs b/fearless_simd/src/traits.rs index 76e7039f..3eee4e35 100644 --- a/fearless_simd/src/traits.rs +++ b/fearless_simd/src/traits.rs @@ -5,6 +5,7 @@ missing_docs, reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] + use crate::{Level, Simd, SimdBase}; /// Element-wise selection between two SIMD vectors using `self`. @@ -164,3 +165,49 @@ pub trait SimdSplit: SimdBase { /// Split this vector into left and right halves. fn split(self) -> (Self::Split, Self::Split); } + +/// Gathering of elements in a slice, treating each element in the vector as an index. Out-of-bounds +/// indices are clamped to the last element in the slice. +/// +/// Currently, this does not map to hardware "gather" instructions, but does allow you to avoid +/// bounds checks that the compiler is currently not capable of eliding. +pub trait SimdGather: SimdBase { + /// The type returned from [`SimdGather::gather`]. This will always be `[T; >::N]`, but associated constants are currently not powerful enough to express + /// that directly. + type Gathered; + + /// Gather elements from a slice, treating each element in this vector as an index. Returns an + /// array of gathered elements, with the same element count as the vector type. Out-of bounds + /// indices are clamped to the last element in the slice. + /// + /// Panics if the slice doesn't contain at least one element. + fn gather(self, src: &[T]) -> Self::Gathered; + /// Gather elements from a slice into another slice, treating each element in this vector as an + /// index. + /// + /// Unlike [`SimdGather::gather`], this is "length-erased", and can be used with the + /// native-width associated types on [`Simd`] (e.g. [`Simd::u32s`]). + /// + /// Panics if the slice doesn't contain at least one element, or if the destination slice + /// doesn't have the same element count as this vector. + fn gather_into(self, src: &[T], dst: &mut [T]); +} + +/// Scattering of elements into a slice, treating each element in the vector as an index to write +/// to. Out-of-bounds indices are clamped to the last element in the slice. If multiple indices are +/// identical, the order in which the writes occur is unspecified. +/// +/// Currently, this does not map to hardware "scatter" instructions, but does allow you to avoid +/// bounds checks that the compiler is currently not capable of eliding. +pub trait SimdScatter: SimdBase { + /// Scatter elements from one slice into another, treating each element in this vector as an + /// index into the destination slice. Out-of bounds indices are clamped to the last element in + /// the slice. + /// + /// Panics if the slice doesn't contain at least one element, or if the source slice doesn't + /// have the same element count as this vector. + /// + /// If multiple indices are identical, the order in which the writes occur is unspecified. + fn scatter(self, src: &[T], dst: &mut [T]); +} diff --git a/fearless_simd_dev_macros/src/lib.rs b/fearless_simd_dev_macros/src/lib.rs index 1cb2aa9c..3aa68cb3 100644 --- a/fearless_simd_dev_macros/src/lib.rs +++ b/fearless_simd_dev_macros/src/lib.rs @@ -23,28 +23,29 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { let avx2_name = get_ident("avx2"); let wasm_name = get_ident("wasm"); - let ignore_attr = |f: fn(&str) -> bool| { - let should_ignore = input_fn - .attrs - .iter() - .any(|attr| attr.path().is_ident("ignore")) - || f(&input_fn_name.to_string()); - if should_ignore { - quote! { #[ignore] } - } else { - quote! {} + let test_attrs: Vec<_> = input_fn + .attrs + .iter() + .filter(|attr| !attr.path().is_ident("simd_test")) + .collect(); + + // If this is a `#[should_panic]` test, run it with the fallback so it actually panics + let should_panic_attr = input_fn + .attrs + .iter() + .find(|attr| attr.path().is_ident("should_panic")); + let panic_else = if should_panic_attr.is_some() { + quote! { + let fallback = fearless_simd::Fallback::new(); + #input_fn_name(fallback); } + } else { + quote! {} }; - let ignore_fallback = ignore_attr(exclude_fallback); - let ignore_neon = ignore_attr(exclude_neon); - let ignore_sse4 = ignore_attr(exclude_sse4); - let ignore_avx2 = ignore_attr(exclude_avx2); - let ignore_wasm = ignore_attr(exclude_wasm); - let fallback_snippet = quote! { + #(#test_attrs)* #[test] - #ignore_fallback fn #fallback_name() { let fallback = fearless_simd::Fallback::new(); #input_fn_name(fallback); @@ -60,49 +61,58 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { // target features aren't supported. This is not ideal, since it may mislead you into thinking tests have passed // when they haven't even been run, but some CI runners don't support all target features and we don't want failures // as a result of that. + // + // However, for #[should_panic] tests, we need to panic if features aren't available to avoid + // "test did not panic as expected" failures. let neon_snippet = quote! { #[cfg(target_arch = "aarch64")] + #(#test_attrs)* #[test] - #ignore_neon fn #neon_name() { if std::arch::is_aarch64_feature_detected!("neon") { let neon = unsafe { fearless_simd::aarch64::Neon::new_unchecked() }; #input_fn_name(neon); + } else { + #panic_else } } }; let sse4_snippet = quote! { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #(#test_attrs)* #[test] - #ignore_sse4 fn #sse4_name() { if std::arch::is_x86_feature_detected!("sse4.2") { let sse4 = unsafe { fearless_simd::x86::Sse4_2::new_unchecked() }; #input_fn_name(sse4); + } else { + #panic_else } } }; let avx2_snippet = quote! { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #(#test_attrs)* #[test] - #ignore_avx2 fn #avx2_name() { if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") { let avx2 = unsafe { fearless_simd::x86::Avx2::new_unchecked() }; #input_fn_name(avx2); + } else { + #panic_else } } }; let wasm_snippet = quote! { #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + #(#test_attrs)* #[test] - #ignore_wasm fn #wasm_name() { let wasm = unsafe { fearless_simd::wasm32::WasmSimd128::new_unchecked() }; #input_fn_name(wasm); @@ -120,26 +130,3 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { } .into() } - -// You can update below functions if you want to exclude certain tests from different architectures -// (for example because they haven't been implemented yet). - -fn exclude_neon(_test_name: &str) -> bool { - false -} - -fn exclude_fallback(_test_name: &str) -> bool { - false -} - -fn exclude_sse4(_test_name: &str) -> bool { - false -} - -fn exclude_avx2(_test_name: &str) -> bool { - false -} - -fn exclude_wasm(_test_name: &str) -> bool { - false -} diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index 1e9c39ad..ec4a8c6f 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -24,7 +24,7 @@ pub(crate) fn mk_simd_trait() -> TokenStream { } } let mut code = quote! { - use crate::{seal::Seal, Level, SimdElement, SimdFrom, SimdInto, SimdCvtTruncate, SimdCvtFloat, Select, Bytes}; + use crate::{seal::Seal, Level, SimdElement, SimdFrom, SimdInto, SimdCvtTruncate, SimdCvtFloat, SimdGather, SimdScatter, Select, Bytes}; #imports /// The main SIMD trait, implemented by all SIMD token types. /// @@ -67,15 +67,15 @@ pub(crate) fn mk_simd_trait() -> TokenStream { /// A native-width SIMD vector of [`f64`]s. type f64s: SimdFloat, Mask = Self::mask64s>; /// A native-width SIMD vector of [`u8`]s. - type u8s: SimdInt, Mask = Self::mask8s>; + type u8s: SimdInt, Mask = Self::mask8s> + SimdGather + SimdScatter; /// A native-width SIMD vector of [`i8`]s. type i8s: SimdInt, Mask = Self::mask8s, Bytes = ::Bytes> + core::ops::Neg; /// A native-width SIMD vector of [`u16`]s. - type u16s: SimdInt, Mask = Self::mask16s>; + type u16s: SimdInt, Mask = Self::mask16s> + SimdGather + SimdScatter; /// A native-width SIMD vector of [`i16`]s. type i16s: SimdInt, Mask = Self::mask16s, Bytes = ::Bytes> + core::ops::Neg; /// A native-width SIMD vector of [`u32`]s. - type u32s: SimdInt, Mask = Self::mask32s> + SimdCvtTruncate; + type u32s: SimdInt, Mask = Self::mask32s> + SimdCvtTruncate + SimdGather + SimdScatter; /// A native-width SIMD vector of [`i32`]s. type i32s: SimdInt, Mask = Self::mask32s, Bytes = ::Bytes> + SimdCvtTruncate + core::ops::Neg; diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index a1be080d..d3ba52e0 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -15,7 +15,7 @@ use crate::{ pub(crate) fn mk_simd_types() -> TokenStream { let mut result = quote! { - use crate::{Bytes, Select, Simd, SimdBase, SimdFrom, SimdInto, SimdCvtFloat, SimdCvtTruncate}; + use crate::{Bytes, Select, Simd, SimdBase, SimdFrom, SimdInto, SimdGather, SimdScatter, SimdCvtFloat, SimdCvtTruncate}; }; for ty in SIMD_TYPES { let name = ty.rust(); @@ -63,6 +63,8 @@ pub(crate) fn mk_simd_types() -> TokenStream { }; let impl_block = simd_vec_impl(ty); let mut conditional_impls = Vec::new(); + + // Conversion operations // TODO: Relax `if` clauses once 64-bit integer or 16-bit floats vectors are implemented match ty.scalar { ScalarType::Float if ty.scalar_bits == 32 => { @@ -133,6 +135,100 @@ pub(crate) fn mk_simd_types() -> TokenStream { } _ => {} } + + // Scatter/gather operations + if ty.scalar == ScalarType::Unsigned { + let min_method = generic_op_name("min", ty); + conditional_impls.push(quote! { + impl SimdGather for #name { + type Gathered = [T; #len]; + + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + + // Check if the element type is small enough that the slice's length could (and then does) + // exceed its maximum value. The `size_of` check ensures that `Self::Element::MAX as usize` will + // never truncate/wrap. + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && + src.len() > Self::Element::MAX as usize + { + // The slice is big enough to accept any index. For instance, if this is a vector of `u8`s, + // `Self::Element::MAX` is 255, so the slice must be at least 256 elements long. + self + } else { + // No `max(0)`; we do not implement `SimdGather` for signed integers. + // + // Converting `src.len() - 1` to `Self::Element` will not wrap, because if `src.len() - 1 >= + // Self::Element::MAX`, that means that `src.len() > Self::Element::MAX`, and we take the + // above branch instead. + self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) + }; + + core::array::from_fn(|i| unsafe { + // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because we + // asserted that `src` is not empty. Therefore, the index into `src` is valid. `i` will be + // between [0, Self::N), so the index into `inbounds` is valid. + *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + }) + } + + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!(Self::N, dst.len(), "gather_into: destination slice must have the same element count as the vector type"); + assert!(!src.is_empty(), "gather_into: source slice must not be empty"); + + // Same logic as for `gather`. See the comments there. + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && + src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) + }; + + for i in 0..Self::N { + unsafe { + // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because + // we asserted that `src` is not empty. Therefore, the index into `src` is valid. `i` + // will be between [0, Self::N), so the index into `inbounds` is valid. The index into + // `dst` is also valid, since we asserted above that `dst.len() == Self::N`. + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); + } + } + } + } + + impl SimdScatter for #name { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!(Self::N, src.len(), "scatter: source slice must have the same element count as the vector type"); + assert!(!dst.is_empty(), "scatter: destination slice must not be empty"); + + // Same logic as for `gather`, but for `dst`. See the comments there. + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && + dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.#min_method(self, ((dst.len() - 1) as Self::Element).simd_into(self.simd)) + }; + + for i in 0..Self::N { + unsafe { + // Safety: All elements of `inbounds` are in [0, dst.len()). 0 is a valid index, because + // we asserted that `dst` is not empty. Therefore, the index into `dst` is valid. `i` + // will be between [0, Self::N), so the index into `inbounds` is valid. The index into + // `src` is also valid, since we asserted above that `src.len() == Self::N`. + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } + } + } + } + }); + } + + // Split/combine operations if let Some(half_ty) = ty.split_operand() { let half_ty_rust = half_ty.rust(); let split_method = generic_op_name("split", ty); @@ -161,6 +257,7 @@ pub(crate) fn mk_simd_types() -> TokenStream { } }); } + result.extend(quote! { #[doc = #doc] #[derive(Clone, Copy)] diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 233a16be..c033d19b 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -2984,3 +2984,364 @@ fn index_consistency(simd: S) { assert_eq!(i, *v.index_mut(i) as usize); } } + +#[simd_test] +fn gather_u8x16_basic(simd: S) { + let indices = u8x16::from_slice( + simd, + &[0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15], + ); + let src = [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, + ]; + let result = indices.gather(&src); + assert_eq!( + result, + [ + 10, 30, 50, 70, 90, 110, 130, 150, 20, 40, 60, 80, 100, 120, 140, 160 + ] + ); +} + +#[simd_test] +fn gather_u16x8_basic(simd: S) { + let indices = u16x8::from_slice(simd, &[7, 6, 5, 4, 3, 2, 1, 0]); + let src = [100, 200, 300, 400, 500, 600, 700, 800]; + let result = indices.gather(&src); + assert_eq!(result, [800, 700, 600, 500, 400, 300, 200, 100]); +} + +#[simd_test] +fn gather_u32x4_basic(simd: S) { + let indices = u32x4::from_slice(simd, &[3, 1, 2, 0]); + let src = [1000, 2000, 3000, 4000]; + let result = indices.gather(&src); + assert_eq!(result, [4000, 2000, 3000, 1000]); +} + +#[simd_test] +fn gather_with_duplicate_indices(simd: S) { + let indices = u8x16::from_slice(simd, &[0, 0, 1, 1, 2, 2, 3, 3, 0, 1, 2, 3, 0, 1, 2, 3]); + let src = [10, 20, 30, 40]; + let result = indices.gather(&src); + assert_eq!( + result, + [ + 10, 10, 20, 20, 30, 30, 40, 40, 10, 20, 30, 40, 10, 20, 30, 40 + ] + ); +} + +#[simd_test] +fn gather_out_of_bounds_clamping(simd: S) { + // Indices that are out of bounds should be clamped to the last element + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 100, 200, 255, 3, 4, 0, 50, 99, 1, 2, 3, 4, 150], + ); + let src = [10, 20, 30, 40, 50]; + let result = indices.gather(&src); + assert_eq!( + result, + [ + 10, 20, 30, 50, 50, 50, 40, 50, 10, 50, 50, 20, 30, 40, 50, 50 + ] + ); +} + +#[simd_test] +fn gather_u32x4_out_of_bounds(simd: S) { + let indices = u32x4::from_slice(simd, &[0, 1000, u32::MAX, 2]); + let src = [100, 200, 300]; + let result = indices.gather(&src); + assert_eq!(result, [100, 300, 300, 300]); +} + +#[simd_test] +fn gather_single_element_source(simd: S) { + // All indices should point to the single element + let indices = u8x16::from_slice( + simd, + &[0, 5, 10, 255, 100, 50, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + ); + let src = [42]; + let result = indices.gather(&src); + assert_eq!(result, [42; 16]); +} + +// Note that the #[should_panic] tests are automatically skipped on wasm32-wasip1 with the default runner, which has +// panic=abort. cargo-nextest appears to support these, since it runs each test in its own process. +#[simd_test] +#[should_panic(expected = "gather: source slice must not be empty")] +fn gather_empty_source_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src: [i32; 0] = []; + let _result = indices.gather(&src); +} + +#[simd_test] +fn gather_into_u8x16_basic(simd: S) { + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + let src = [100; 20]; + let mut dst = [0; 16]; + indices.gather_into(&src, &mut dst); + assert_eq!(dst, [100; 16]); +} + +#[simd_test] +fn gather_into_u32x4_basic(simd: S) { + let indices = u32x4::from_slice(simd, &[2, 0, 3, 1]); + let src = [10, 20, 30, 40]; + let mut dst = [0; 4]; + indices.gather_into(&src, &mut dst); + assert_eq!(dst, [30, 10, 40, 20]); +} + +#[simd_test] +fn gather_into_with_clamping(simd: S) { + let indices = u16x8::from_slice(simd, &[0, 1, 2, 100, 200, 500, 1000, u16::MAX]); + let src = [5, 10, 15]; + let mut dst = [0; 8]; + indices.gather_into(&src, &mut dst); + // All out-of-bounds indices should clamp to index 2 + assert_eq!(dst, [5, 10, 15, 15, 15, 15, 15, 15]); +} + +#[simd_test] +#[should_panic(expected = "gather_into: source slice must not be empty")] +fn gather_into_empty_source_panics(simd: S) { + let indices = u32x4::splat(simd, 0); + let src: [i32; 0] = []; + let mut dst = [0; 4]; + indices.gather_into(&src, &mut dst); +} + +#[simd_test] +#[should_panic( + expected = "gather_into: destination slice must have the same element count as the vector type" +)] +fn gather_into_wrong_dst_size_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src = [1, 2, 3]; + let mut dst = [0; 8]; // Should be 16 + indices.gather_into(&src, &mut dst); +} + +#[simd_test] +fn scatter_u8x16_basic(simd: S) { + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + let src = [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, + ]; + let mut dst = [0; 16]; + indices.scatter(&src, &mut dst); + assert_eq!(dst, src); +} + +#[simd_test] +fn scatter_u16x8_basic(simd: S) { + let indices = u16x8::from_slice(simd, &[7, 6, 5, 4, 3, 2, 1, 0]); + let src = [100, 200, 300, 400, 500, 600, 700, 800]; + let mut dst = [0; 8]; + indices.scatter(&src, &mut dst); + assert_eq!(dst, [800, 700, 600, 500, 400, 300, 200, 100]); +} + +#[simd_test] +fn scatter_u32x4_basic(simd: S) { + let indices = u32x4::from_slice(simd, &[2, 0, 3, 1]); + let src = [10, 20, 30, 40]; + let mut dst = [0; 4]; + indices.scatter(&src, &mut dst); + assert_eq!(dst, [20, 40, 10, 30]); +} + +#[simd_test] +fn scatter_with_duplicate_indices(simd: S) { + // When multiple indices point to the same location, one of them will win + // The behavior is unspecified, but all should be valid values from src + let indices = u8x16::from_slice(simd, &[0, 0, 1, 1, 2, 2, 3, 3, 0, 1, 2, 3, 0, 1, 2, 3]); + let src = [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, + ]; + let mut dst = [0; 4]; + indices.scatter(&src, &mut dst); + + assert!([10, 20, 90, 130].contains(&dst[0])); + assert!([30, 40, 100, 140].contains(&dst[1])); + assert!([50, 60, 110, 150].contains(&dst[2])); + assert!([70, 80, 120, 160].contains(&dst[3])); +} + +#[simd_test] +fn scatter_out_of_bounds_clamping(simd: S) { + // Out of bounds indices should be clamped to the last element + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 100, 200, 255, 3, 4, 0, 50, 99, 1, 2, 3, 4, 150], + ); + let src = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut dst = [0; 5]; + indices.scatter(&src, &mut dst); + + assert!([1, 9].contains(&dst[0])); + assert!([2, 12].contains(&dst[1])); + assert!([3, 13].contains(&dst[2])); + assert!([7, 14].contains(&dst[3])); + assert!([5, 6, 8, 10, 11, 15, 16].contains(&dst[4])); +} + +#[simd_test] +fn scatter_single_element_destination(simd: S) { + // All indices should be clamped to 0, so all writes go to the same location + let indices = u8x16::from_slice( + simd, + &[0, 5, 10, 255, 100, 50, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + ); + let src = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut dst = [0]; + indices.scatter(&src, &mut dst); + + assert!((1..=16).contains(&dst[0])); +} + +#[simd_test] +fn scatter_u32x4_out_of_bounds(simd: S) { + let indices = u32x4::from_slice(simd, &[0, 1000, u32::MAX, 2]); + let src = [100, 200, 300, 400]; + let mut dst = [0; 3]; + indices.scatter(&src, &mut dst); + + assert_eq!(dst[0], 100); + assert!([200, 300, 400].contains(&dst[2])); +} + +#[simd_test] +#[should_panic(expected = "scatter: destination slice must not be empty")] +fn scatter_empty_destination_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src = [0; 16]; + let mut dst: [i32; 0] = []; + indices.scatter(&src, &mut dst); +} + +#[simd_test] +#[should_panic( + expected = "scatter: source slice must have the same element count as the vector type" +)] +fn scatter_wrong_src_size_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src = [1, 2, 3]; // Should be 16 + let mut dst = [0; 10]; + indices.scatter(&src, &mut dst); +} + +// ===== Additional edge case tests ===== + +#[simd_test] +fn gather_scatter_roundtrip(simd: S) { + // Test that gather followed by scatter with the same indices preserves data + let indices = u32x4::from_slice(simd, &[3, 1, 2, 0]); + let original = [100, 200, 300, 400]; + + let gathered = indices.gather(&original); + assert_eq!(gathered, [400, 200, 300, 100]); + + let mut result = [0; 4]; + indices.scatter(&gathered, &mut result); + assert_eq!(result, original); +} + +#[simd_test] +fn gather_u16x16_native_width(simd: S) { + let data: Vec = (0..100).collect(); + let indices = S::u16s::from_slice(simd, &vec![5_u16; S::u16s::N]); + + let mut result = vec![0_u32; S::u16s::N]; + indices.gather_into(&data, &mut result); + assert_eq!(result, vec![5_u32; S::u16s::N]); +} + +#[simd_test] +fn scatter_u32_native_width(simd: S) { + let src = vec![42_u64; S::u32s::N]; + let mut dst = vec![0_u64; 100]; + + let indices = S::u32s::from_slice(simd, &vec![10_u32; S::u32s::N]); + indices.scatter(&src, &mut dst); + + for (i, item) in dst.iter().enumerate() { + if i == 10 { + assert_eq!(*item, 42); + } else { + assert_eq!(*item, 0); + } + } +} + +#[simd_test] +fn gather_with_large_type(simd: S) { + #[derive(Debug, Clone, Copy, PartialEq)] + struct LargeStruct { + a: u64, + b: u64, + c: u64, + } + + let src = [ + LargeStruct { a: 1, b: 2, c: 3 }, + LargeStruct { a: 4, b: 5, c: 6 }, + LargeStruct { a: 7, b: 8, c: 9 }, + LargeStruct { + a: 10, + b: 11, + c: 12, + }, + ]; + + let indices = u32x4::from_slice(simd, &[3, 0, 2, 1]); + let result = indices.gather(&src); + + assert_eq!( + result[0], + LargeStruct { + a: 10, + b: 11, + c: 12 + } + ); + assert_eq!(result[1], LargeStruct { a: 1, b: 2, c: 3 }); + assert_eq!(result[2], LargeStruct { a: 7, b: 8, c: 9 }); + assert_eq!(result[3], LargeStruct { a: 4, b: 5, c: 6 }); +} + +#[simd_test] +fn scatter_with_large_type(simd: S) { + #[derive(Debug, Clone, Copy, PartialEq)] + struct LargeStruct { + a: u64, + b: u64, + } + + let src = [ + LargeStruct { a: 1, b: 2 }, + LargeStruct { a: 3, b: 4 }, + LargeStruct { a: 5, b: 6 }, + LargeStruct { a: 7, b: 8 }, + ]; + + let indices = u32x4::from_slice(simd, &[2, 0, 3, 1]); + let mut dst = [LargeStruct { a: 0, b: 0 }; 4]; + indices.scatter(&src, &mut dst); + + assert_eq!(dst[0], LargeStruct { a: 3, b: 4 }); + assert_eq!(dst[1], LargeStruct { a: 7, b: 8 }); + assert_eq!(dst[2], LargeStruct { a: 1, b: 2 }); + assert_eq!(dst[3], LargeStruct { a: 5, b: 6 }); +}