Skip to content
9 changes: 4 additions & 5 deletions fearless_simd/src/generated/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

// This file is autogenerated by fearless_simd_gen

use crate::{Level, Simd, SimdFrom, SimdInto, arch_types::ArchTypes, seal::Seal};
use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
use crate::{
f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
Expand All @@ -14,8 +14,7 @@ use crate::{
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::ops::*;
#[doc = r#" The SIMD token for the "AVX2" and "FMA" level."#]
#[doc = "The SIMD token for the \"AVX2\" and \"FMA\" level."]
#[derive(Clone, Copy, Debug)]
pub struct Avx2 {
pub avx2: crate::core_arch::x86::Avx2,
Expand All @@ -25,10 +24,10 @@ impl Avx2 {
#[doc = r""]
#[doc = r" # Safety"]
#[doc = r""]
#[doc = r" The AVX2 and FMA CPU feature must be available."]
#[doc = r" The AVX2 and FMA CPU features must be available."]
#[inline]
pub const unsafe fn new_unchecked() -> Self {
Avx2 {
Self {
avx2: unsafe { crate::core_arch::x86::Avx2::new_unchecked() },
}
}
Expand Down
6 changes: 3 additions & 3 deletions fearless_simd/src/generated/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

// This file is autogenerated by fearless_simd_gen

use crate::{Bytes, Level, Simd, SimdInto, arch_types::ArchTypes, seal::Seal};
use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
use crate::{
f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
Expand Down Expand Up @@ -74,15 +74,15 @@ impl FloatExt for f64 {
libm::trunc(self)
}
}
#[doc = r#" The SIMD token for the "fallback" level."#]
#[doc = "The SIMD token for the \"fallback\" level."]
#[derive(Clone, Copy, Debug)]
pub struct Fallback {
pub fallback: crate::core_arch::fallback::Fallback,
}
impl Fallback {
#[inline]
pub const fn new() -> Self {
Fallback {
Self {
fallback: crate::core_arch::fallback::Fallback::new(),
}
}
Expand Down
80 changes: 40 additions & 40 deletions fearless_simd/src/generated/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@

// This file is autogenerated by fearless_simd_gen

use crate::{Level, Simd, SimdFrom, SimdInto, arch_types::ArchTypes, seal::Seal};
use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
use crate::{
f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
u32x4, u32x8, u32x16,
};
use core::arch::aarch64::*;
#[doc = r#" The SIMD token for the "neon" level."#]
#[doc = "The SIMD token for the \"neon\" level."]
#[derive(Clone, Copy, Debug)]
pub struct Neon {
pub neon: crate::core_arch::aarch64::Neon,
Expand All @@ -25,6 +25,44 @@ impl Neon {
}
}
impl Seal for Neon {}
impl ArchTypes for Neon {
type f32x4 = crate::support::Aligned128<float32x4_t>;
type i8x16 = crate::support::Aligned128<int8x16_t>;
type u8x16 = crate::support::Aligned128<uint8x16_t>;
type mask8x16 = crate::support::Aligned128<int8x16_t>;
type i16x8 = crate::support::Aligned128<int16x8_t>;
type u16x8 = crate::support::Aligned128<uint16x8_t>;
type mask16x8 = crate::support::Aligned128<int16x8_t>;
type i32x4 = crate::support::Aligned128<int32x4_t>;
type u32x4 = crate::support::Aligned128<uint32x4_t>;
type mask32x4 = crate::support::Aligned128<int32x4_t>;
type f64x2 = crate::support::Aligned128<float64x2_t>;
type mask64x2 = crate::support::Aligned128<int64x2_t>;
type f32x8 = crate::support::Aligned256<float32x4x2_t>;
type i8x32 = crate::support::Aligned256<int8x16x2_t>;
type u8x32 = crate::support::Aligned256<uint8x16x2_t>;
type mask8x32 = crate::support::Aligned256<int8x16x2_t>;
type i16x16 = crate::support::Aligned256<int16x8x2_t>;
type u16x16 = crate::support::Aligned256<uint16x8x2_t>;
type mask16x16 = crate::support::Aligned256<int16x8x2_t>;
type i32x8 = crate::support::Aligned256<int32x4x2_t>;
type u32x8 = crate::support::Aligned256<uint32x4x2_t>;
type mask32x8 = crate::support::Aligned256<int32x4x2_t>;
type f64x4 = crate::support::Aligned256<float64x2x2_t>;
type mask64x4 = crate::support::Aligned256<int64x2x2_t>;
type f32x16 = crate::support::Aligned512<float32x4x4_t>;
type i8x64 = crate::support::Aligned512<int8x16x4_t>;
type u8x64 = crate::support::Aligned512<uint8x16x4_t>;
type mask8x64 = crate::support::Aligned512<int8x16x4_t>;
type i16x32 = crate::support::Aligned512<int16x8x4_t>;
type u16x32 = crate::support::Aligned512<uint16x8x4_t>;
type mask16x32 = crate::support::Aligned512<int16x8x4_t>;
type i32x16 = crate::support::Aligned512<int32x4x4_t>;
type u32x16 = crate::support::Aligned512<uint32x4x4_t>;
type mask32x16 = crate::support::Aligned512<int32x4x4_t>;
type f64x8 = crate::support::Aligned512<float64x2x4_t>;
type mask64x8 = crate::support::Aligned512<int64x2x4_t>;
}
impl Simd for Neon {
type f32s = f32x4<Self>;
type f64s = f64x2<Self>;
Expand Down Expand Up @@ -6712,44 +6750,6 @@ impl Simd for Neon {
)
}
}
impl ArchTypes for Neon {
type f32x4 = crate::support::Aligned128<float32x4_t>;
type i8x16 = crate::support::Aligned128<int8x16_t>;
type u8x16 = crate::support::Aligned128<uint8x16_t>;
type mask8x16 = crate::support::Aligned128<int8x16_t>;
type i16x8 = crate::support::Aligned128<int16x8_t>;
type u16x8 = crate::support::Aligned128<uint16x8_t>;
type mask16x8 = crate::support::Aligned128<int16x8_t>;
type i32x4 = crate::support::Aligned128<int32x4_t>;
type u32x4 = crate::support::Aligned128<uint32x4_t>;
type mask32x4 = crate::support::Aligned128<int32x4_t>;
type f64x2 = crate::support::Aligned128<float64x2_t>;
type mask64x2 = crate::support::Aligned128<int64x2_t>;
type f32x8 = crate::support::Aligned256<float32x4x2_t>;
type i8x32 = crate::support::Aligned256<int8x16x2_t>;
type u8x32 = crate::support::Aligned256<uint8x16x2_t>;
type mask8x32 = crate::support::Aligned256<int8x16x2_t>;
type i16x16 = crate::support::Aligned256<int16x8x2_t>;
type u16x16 = crate::support::Aligned256<uint16x8x2_t>;
type mask16x16 = crate::support::Aligned256<int16x8x2_t>;
type i32x8 = crate::support::Aligned256<int32x4x2_t>;
type u32x8 = crate::support::Aligned256<uint32x4x2_t>;
type mask32x8 = crate::support::Aligned256<int32x4x2_t>;
type f64x4 = crate::support::Aligned256<float64x2x2_t>;
type mask64x4 = crate::support::Aligned256<int64x2x2_t>;
type f32x16 = crate::support::Aligned512<float32x4x4_t>;
type i8x64 = crate::support::Aligned512<int8x16x4_t>;
type u8x64 = crate::support::Aligned512<uint8x16x4_t>;
type mask8x64 = crate::support::Aligned512<int8x16x4_t>;
type i16x32 = crate::support::Aligned512<int16x8x4_t>;
type u16x32 = crate::support::Aligned512<uint16x8x4_t>;
type mask16x32 = crate::support::Aligned512<int16x8x4_t>;
type i32x16 = crate::support::Aligned512<int32x4x4_t>;
type u32x16 = crate::support::Aligned512<uint32x4x4_t>;
type mask32x16 = crate::support::Aligned512<int32x4x4_t>;
type f64x8 = crate::support::Aligned512<float64x2x4_t>;
type mask64x8 = crate::support::Aligned512<int64x2x4_t>;
}
impl<S: Simd> SimdFrom<float32x4_t, S> for f32x4<S> {
#[inline(always)]
fn simd_from(arch: float32x4_t, simd: S) -> Self {
Expand Down
5 changes: 2 additions & 3 deletions fearless_simd/src/generated/sse4_2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

// This file is autogenerated by fearless_simd_gen

use crate::{Level, Simd, SimdFrom, SimdInto, arch_types::ArchTypes, seal::Seal};
use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
use crate::{
f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
Expand All @@ -14,8 +14,7 @@ use crate::{
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::ops::*;
#[doc = r#" The SIMD token for the "SSE 4.2" level."#]
#[doc = "The SIMD token for the \"SSE4.2\" level."]
#[derive(Clone, Copy, Debug)]
pub struct Sse4_2 {
pub sse4_2: crate::core_arch::x86::Sse4_2,
Expand Down
10 changes: 3 additions & 7 deletions fearless_simd/src/generated/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@

// This file is autogenerated by fearless_simd_gen

use crate::{Level, Simd, SimdFrom, SimdInto, arch_types::ArchTypes, seal::Seal};
use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
use crate::{
f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
u32x4, u32x8, u32x16,
};
use core::arch::wasm32::*;
#[doc = r#" The SIMD token for the "wasm128" level."#]
#[doc = "The SIMD token for the \"wasm128\" level."]
#[derive(Clone, Copy, Debug)]
pub struct WasmSimd128 {
pub wasmsimd128: crate::core_arch::wasm32::WasmSimd128,
Expand Down Expand Up @@ -82,11 +82,7 @@ impl Simd for WasmSimd128 {
}
#[inline]
fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
#[inline]
unsafe fn vectorize_simd128<F: FnOnce() -> R, R>(f: F) -> R {
f()
}
unsafe { vectorize_simd128(f) }
f()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is possible because it's enabled statically, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes; I left a comment on this in mk_wasm.

}
#[inline(always)]
fn splat_f32x4(self, val: f32) -> f32x4<Self> {
Expand Down
12 changes: 1 addition & 11 deletions fearless_simd_gen/src/arch/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,22 +60,12 @@ pub(crate) fn translate_op(op: &str, is_float: bool) -> Option<&'static str> {
}

pub(crate) fn simple_intrinsic(name: &str, ty: &VecType) -> TokenStream {
let ty_prefix = arch_ty(ty);
let ty_prefix = ty.scalar.rust(ty.scalar_bits);
let ident = Ident::new(name, Span::call_site());

quote! {#ty_prefix::#ident}
}

pub(crate) fn arch_ty(ty: &VecType) -> Ident {
let scalar = match ty.scalar {
ScalarType::Float => "f",
ScalarType::Unsigned => "u",
ScalarType::Int | ScalarType::Mask => "i",
};
let name = format!("{}{}", scalar, ty.scalar_bits);
Ident::new(&name, Span::call_site())
}

pub(crate) fn expr(op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream {
let Some(translated) = translate_op(op, ty.scalar == ScalarType::Float) else {
unimplemented!("missing {op}");
Expand Down
16 changes: 0 additions & 16 deletions fearless_simd_gen/src/arch/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,6 @@ fn translate_op(op: &str) -> Option<&'static str> {
})
}

pub(crate) fn arch_ty(ty: &VecType) -> Ident {
let scalar = match ty.scalar {
ScalarType::Float => "float",
ScalarType::Unsigned => "uint",
ScalarType::Int | ScalarType::Mask => "int",
};
let name = if ty.n_bits() == 256 {
format!("{}{}x{}x2_t", scalar, ty.scalar_bits, ty.len / 2)
} else if ty.n_bits() == 512 {
format!("{}{}x{}x4_t", scalar, ty.scalar_bits, ty.len / 4)
} else {
format!("{}{}x{}_t", scalar, ty.scalar_bits, ty.len)
};
Ident::new(&name, Span::call_site())
}

// expects args and return value in arch dialect
pub(crate) fn expr(op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream {
// There is no logical NOT for 64-bit, so we need this workaround.
Expand Down
4 changes: 2 additions & 2 deletions fearless_simd_gen/src/arch/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn translate_op(op: &str) -> Option<&'static str> {
}

pub(crate) fn simple_intrinsic(name: &str, ty: &VecType) -> Ident {
let ty_prefix = arch_ty(ty);
let ty_prefix = arch_prefix(ty);
let ident = Ident::new(name, Span::call_site());
Ident::new(&format!("{}_{}", ty_prefix, ident), Span::call_site())
}
Expand All @@ -48,7 +48,7 @@ pub(crate) fn v128_intrinsic(name: &str) -> Ident {
Ident::new(&format!("{}_{}", ty_prefix, ident), Span::call_site())
}

pub(crate) fn arch_ty(ty: &VecType) -> Ident {
pub(crate) fn arch_prefix(ty: &VecType) -> Ident {
let scalar = match ty.scalar {
ScalarType::Float => "f",
ScalarType::Unsigned => "u",
Expand Down
37 changes: 6 additions & 31 deletions fearless_simd_gen/src/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT

use proc_macro2::{Ident, Span, TokenStream};
use quote::quote;
use quote::{ToTokens, quote};

use crate::{
ops::{Op, OpSig, RefKind},
types::{SIMD_TYPES, ScalarType, VecType},
types::{ScalarType, VecType},
};

pub(crate) fn generic_op_name(op: &str, ty: &VecType) -> Ident {
Expand All @@ -22,10 +22,6 @@ pub(crate) fn generic_op(op: &Op, ty: &VecType) -> TokenStream {
let combine = generic_op_name("combine", &half);
let do_half = generic_op_name(op.method, &half);
let method_sig = op.simd_trait_method_sig(ty);
let method_sig = quote! {
#[inline(always)]
#method_sig
};
match op.sig {
OpSig::Splat => {
quote! {
Expand Down Expand Up @@ -306,18 +302,19 @@ pub(crate) fn generic_from_array(
}
}

pub(crate) fn generic_as_array(
pub(crate) fn generic_as_array<T: ToTokens>(
method_sig: TokenStream,
vec_ty: &VecType,
kind: RefKind,
max_block_size: usize,
arch_ty: impl Fn(&VecType) -> Ident,
arch_ty: impl Fn(&VecType) -> T,
) -> TokenStream {
let rust_scalar = vec_ty.scalar.rust(vec_ty.scalar_bits);
let num_scalars = vec_ty.len;

let ref_tok = kind.token();
let native_ty = vec_ty.wrapped_native_ty(arch_ty, max_block_size);
let native_ty =
vec_ty.wrapped_native_ty(|vec_ty| arch_ty(vec_ty).into_token_stream(), max_block_size);

quote! {
#method_sig {
Expand Down Expand Up @@ -358,25 +355,3 @@ pub(crate) fn generic_from_bytes(method_sig: TokenStream, vec_ty: &VecType) -> T
}
}
}

pub(crate) fn impl_arch_types(
level_name: &str,
max_block_size: usize,
arch_ty: impl Fn(&VecType) -> Ident,
) -> TokenStream {
let mut assoc_types = vec![];
for vec_ty in SIMD_TYPES {
let ty_ident = vec_ty.rust();
let wrapper_ty = vec_ty.aligned_wrapper_ty(&arch_ty, max_block_size);
assoc_types.push(quote! {
type #ty_ident = #wrapper_ty;
});
}
let level_tok = Ident::new(level_name, Span::call_site());

quote! {
impl ArchTypes for #level_tok {
#( #assoc_types )*
}
}
}
Loading