Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 36fd3f5

Browse files
committedJan 12, 2025
x86: make SSE2 required for i686 targets and use it to pass SIMD types
1 parent 13f3924 commit 36fd3f5

16 files changed

+245
-84
lines changed
 

‎compiler/rustc_target/src/callconv/mod.rs

Lines changed: 66 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::abi::{
1010
TyAndLayout,
1111
};
1212
use crate::spec::abi::Abi as SpecAbi;
13-
use crate::spec::{self, HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
13+
use crate::spec::{self, HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustAbi, WasmCAbi};
1414

1515
mod aarch64;
1616
mod amdgpu;
@@ -736,14 +736,30 @@ impl<'a, Ty> FnAbi<'a, Ty> {
736736
C: HasDataLayout + HasTargetSpec,
737737
{
738738
let spec = cx.target_spec();
739-
match &spec.arch[..] {
739+
match &*spec.arch {
740740
"x86" => x86::compute_rust_abi_info(cx, self, abi),
741741
"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
742742
"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
743743
"aarch64" => aarch64::compute_rust_abi_info(cx, self),
744744
_ => {}
745745
};
746746

747+
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
748+
// May only return `true` if the target will always pass those arguments the same way,
749+
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
750+
// target features are required to pass a SIMD value in registers must be listed in
751+
// the `abi_required_features` for the current target and ABI.
752+
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
753+
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
754+
// to 128-bit-sized vectors.
755+
"x86" if spec.rust_abi == Some(RustAbi::X86Sse2) => arg.layout.size.bits() <= 128,
756+
"x86_64" if spec.rust_abi != Some(RustAbi::X86Softfloat) => {
757+
arg.layout.size.bits() <= 128
758+
}
759+
// So far, we haven't implemented this logic for any other target.
760+
_ => false,
761+
};
762+
747763
for (arg_idx, arg) in self
748764
.args
749765
.iter_mut()
@@ -755,7 +771,10 @@ impl<'a, Ty> FnAbi<'a, Ty> {
755771
continue;
756772
}
757773

758-
if arg_idx.is_none() && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2 {
774+
if arg_idx.is_none()
775+
&& arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2
776+
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
777+
{
759778
// Return values larger than 2 registers using a return area
760779
// pointer. LLVM and Cranelift disagree about how to return
761780
// values that don't fit in the registers designated for return
@@ -794,53 +813,57 @@ impl<'a, Ty> FnAbi<'a, Ty> {
794813
// rustc_target already ensure any return value which doesn't
795814
// fit in the available amount of return registers is passed in
796815
// the right way for the current target.
816+
// The adjustment is also not necessary nor desired for types with
817+
// a vector representation; those are handled below.
797818
arg.make_indirect();
798819
continue;
799820
}
800821

801822
match arg.layout.backend_repr {
802-
BackendRepr::Memory { .. } => {}
803-
804-
// This is a fun case! The gist of what this is doing is
805-
// that we want callers and callees to always agree on the
806-
// ABI of how they pass SIMD arguments. If we were to *not*
807-
// make these arguments indirect then they'd be immediates
808-
// in LLVM, which means that they'd used whatever the
809-
// appropriate ABI is for the callee and the caller. That
810-
// means, for example, if the caller doesn't have AVX
811-
// enabled but the callee does, then passing an AVX argument
812-
// across this boundary would cause corrupt data to show up.
813-
//
814-
// This problem is fixed by unconditionally passing SIMD
815-
// arguments through memory between callers and callees
816-
// which should get them all to agree on ABI regardless of
817-
// target feature sets. Some more information about this
818-
// issue can be found in #44367.
819-
//
820-
// Note that the intrinsic ABI is exempt here as
821-
// that's how we connect up to LLVM and it's unstable
822-
// anyway, we control all calls to it in libstd.
823-
BackendRepr::Vector { .. }
824-
if abi != SpecAbi::RustIntrinsic && spec.simd_types_indirect =>
825-
{
826-
arg.make_indirect();
827-
continue;
823+
BackendRepr::Memory { .. } => {
824+
// Compute `Aggregate` ABI.
825+
826+
let is_indirect_not_on_stack =
827+
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
828+
assert!(is_indirect_not_on_stack);
829+
830+
let size = arg.layout.size;
831+
if arg.layout.is_sized() && size <= Pointer(AddressSpace::DATA).size(cx) {
832+
// We want to pass small aggregates as immediates, but using
833+
// an LLVM aggregate type for this leads to bad optimizations,
834+
// so we pick an appropriately sized integer type instead.
835+
arg.cast_to(Reg { kind: RegKind::Integer, size });
836+
}
828837
}
829838

830-
_ => continue,
831-
}
832-
// Compute `Aggregate` ABI.
833-
834-
let is_indirect_not_on_stack =
835-
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
836-
assert!(is_indirect_not_on_stack);
837-
838-
let size = arg.layout.size;
839-
if !arg.layout.is_unsized() && size <= Pointer(AddressSpace::DATA).size(cx) {
840-
// We want to pass small aggregates as immediates, but using
841-
// an LLVM aggregate type for this leads to bad optimizations,
842-
// so we pick an appropriately sized integer type instead.
843-
arg.cast_to(Reg { kind: RegKind::Integer, size });
839+
BackendRepr::Vector { .. } => {
840+
// This is a fun case! The gist of what this is doing is
841+
// that we want callers and callees to always agree on the
842+
// ABI of how they pass SIMD arguments. If we were to *not*
843+
// make these arguments indirect then they'd be immediates
844+
// in LLVM, which means that they'd used whatever the
845+
// appropriate ABI is for the callee and the caller. That
846+
// means, for example, if the caller doesn't have AVX
847+
// enabled but the callee does, then passing an AVX argument
848+
// across this boundary would cause corrupt data to show up.
849+
//
850+
// This problem is fixed by unconditionally passing SIMD
851+
// arguments through memory between callers and callees
852+
// which should get them all to agree on ABI regardless of
853+
// target feature sets. Some more information about this
854+
// issue can be found in #44367.
855+
//
856+
// Note that the intrinsic ABI is exempt here as tjpse are not
857+
// real functions anyway, and LLVM expects certain types.
858+
if abi != SpecAbi::RustIntrinsic
859+
&& spec.simd_types_indirect
860+
&& !can_pass_simd_directly(arg)
861+
{
862+
arg.make_indirect();
863+
}
864+
}
865+
866+
_ => {}
844867
}
845868
}
846869
}

‎compiler/rustc_target/src/spec/json.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,19 @@ impl Target {
128128
Some(Ok(()))
129129
})).unwrap_or(Ok(()))
130130
} );
131+
($key_name:ident, RustAbi) => ( {
132+
let name = (stringify!($key_name)).replace("_", "-");
133+
obj.remove(&name).and_then(|o| o.as_str().and_then(|s| {
134+
match s.parse::<super::RustAbi>() {
135+
Ok(rust_abi) => base.$key_name = Some(rust_abi),
136+
_ => return Some(Err(format!(
137+
"'{s}' is not a valid value for rust-abi. \
138+
Use 'x86-softfloat' or 'x86-sse2'."
139+
))),
140+
}
141+
Some(Ok(()))
142+
})).unwrap_or(Ok(()))
143+
} );
131144
($key_name:ident, RelocModel) => ( {
132145
let name = (stringify!($key_name)).replace("_", "-");
133146
obj.remove(&name).and_then(|o| o.as_str().and_then(|s| {
@@ -611,6 +624,7 @@ impl Target {
611624
key!(llvm_mcount_intrinsic, optional);
612625
key!(llvm_abiname);
613626
key!(llvm_floatabi, FloatAbi)?;
627+
key!(rust_abi, RustAbi)?;
614628
key!(relax_elf_relocations, bool);
615629
key!(llvm_args, list);
616630
key!(use_ctors_section, bool);
@@ -786,6 +800,7 @@ impl ToJson for Target {
786800
target_option_val!(llvm_mcount_intrinsic);
787801
target_option_val!(llvm_abiname);
788802
target_option_val!(llvm_floatabi);
803+
target_option_val!(rust_abi);
789804
target_option_val!(relax_elf_relocations);
790805
target_option_val!(llvm_args);
791806
target_option_val!(use_ctors_section);

‎compiler/rustc_target/src/spec/mod.rs

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,37 @@ impl ToJson for FloatAbi {
11141114
}
11151115
}
11161116

1117+
/// The Rust-specific variant of the ABI used for this target.
1118+
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
1119+
pub enum RustAbi {
1120+
/// On x86-32 only: make use of SSE and SSE2 for ABI purposes.
1121+
X86Sse2,
1122+
/// On x86-32/64 only: do not use any FPU or SIMD registers for the ABI/
1123+
X86Softfloat,
1124+
}
1125+
1126+
impl FromStr for RustAbi {
1127+
type Err = ();
1128+
1129+
fn from_str(s: &str) -> Result<RustAbi, ()> {
1130+
Ok(match s {
1131+
"x86-sse2" => RustAbi::X86Sse2,
1132+
"x86-softfloat" => RustAbi::X86Softfloat,
1133+
_ => return Err(()),
1134+
})
1135+
}
1136+
}
1137+
1138+
impl ToJson for RustAbi {
1139+
fn to_json(&self) -> Json {
1140+
match *self {
1141+
RustAbi::X86Sse2 => "x86-sse2",
1142+
RustAbi::X86Softfloat => "x86-softfloat",
1143+
}
1144+
.to_json()
1145+
}
1146+
}
1147+
11171148
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
11181149
pub enum TlsModel {
11191150
GeneralDynamic,
@@ -2493,6 +2524,12 @@ pub struct TargetOptions {
24932524
/// If not provided, LLVM will infer the float ABI from the target triple (`llvm_target`).
24942525
pub llvm_floatabi: Option<FloatAbi>,
24952526

2527+
/// Picks a specific ABI for this target. This is *not* just for "Rust" ABI functions,
2528+
/// it can also affect "C" ABI functions; the point is that this flag is interpreted by
2529+
/// rustc and not forwarded to LLVM.
2530+
/// So far, this is only used on x86.
2531+
pub rust_abi: Option<RustAbi>,
2532+
24962533
/// Whether or not RelaxElfRelocation flag will be passed to the linker
24972534
pub relax_elf_relocations: bool,
24982535

@@ -2652,10 +2689,6 @@ impl TargetOptions {
26522689
.collect();
26532690
}
26542691
}
2655-
2656-
pub(crate) fn has_feature(&self, search_feature: &str) -> bool {
2657-
self.features.split(',').any(|f| f.strip_prefix('+').is_some_and(|f| f == search_feature))
2658-
}
26592692
}
26602693

26612694
impl Default for TargetOptions {
@@ -2761,6 +2794,7 @@ impl Default for TargetOptions {
27612794
llvm_mcount_intrinsic: None,
27622795
llvm_abiname: "".into(),
27632796
llvm_floatabi: None,
2797+
rust_abi: None,
27642798
relax_elf_relocations: false,
27652799
llvm_args: cvs![],
27662800
use_ctors_section: false,
@@ -3221,6 +3255,22 @@ impl Target {
32213255
_ => {}
32223256
}
32233257

3258+
// Check consistency of Rust ABI declaration.
3259+
if let Some(rust_abi) = self.rust_abi {
3260+
match rust_abi {
3261+
RustAbi::X86Sse2 => check_matches!(
3262+
&*self.arch,
3263+
"x86",
3264+
"`x86-sse2` ABI is only valid for x86-32 targets"
3265+
),
3266+
RustAbi::X86Softfloat => check_matches!(
3267+
&*self.arch,
3268+
"x86" | "x86_64",
3269+
"`x86-softfloat` ABI is only valid for x86 targets"
3270+
),
3271+
}
3272+
}
3273+
32243274
// Check that the given target-features string makes some basic sense.
32253275
if !self.features.is_empty() {
32263276
let mut features_enabled = FxHashSet::default();

‎compiler/rustc_target/src/spec/targets/i586_unknown_linux_gnu.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::spec::Target;
22

33
pub(crate) fn target() -> Target {
44
let mut base = super::i686_unknown_linux_gnu::target();
5+
base.rust_abi = None;
56
base.cpu = "pentium".into();
67
base.llvm_target = "i586-unknown-linux-gnu".into();
78
base

‎compiler/rustc_target/src/spec/targets/i586_unknown_linux_musl.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::spec::Target;
22

33
pub(crate) fn target() -> Target {
44
let mut base = super::i686_unknown_linux_musl::target();
5+
base.rust_abi = None;
56
base.cpu = "pentium".into();
67
base.llvm_target = "i586-unknown-linux-musl".into();
78
// FIXME(compiler-team#422): musl targets should be dynamically linked by default.

‎compiler/rustc_target/src/spec/targets/i686_unknown_linux_gnu.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
use crate::spec::{Cc, LinkerFlavor, Lld, SanitizerSet, StackProbeType, Target, base};
1+
use crate::spec::{Cc, LinkerFlavor, Lld, RustAbi, SanitizerSet, StackProbeType, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::linux_gnu::opts();
5+
base.rust_abi = Some(RustAbi::X86Sse2);
6+
// Dear distribution packager, if you are changing the base CPU model with the goal of removing
7+
// the SSE2 requirement, make sure to also set the `rust_abi` to `None` above or else SSE2 will
8+
// still be effectively required.
9+
// Also note that x86 without SSE2 is *not* considered a Tier 1 target by the Rust project.
510
base.cpu = "pentium4".into();
611
base.max_atomic_width = Some(64);
712
base.supported_sanitizers = SanitizerSet::ADDRESS;

‎compiler/rustc_target/src/spec/targets/i686_unknown_linux_musl.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, StackProbeType, Target, base};
1+
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, RustAbi, StackProbeType, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::linux_musl::opts();
5+
base.rust_abi = Some(RustAbi::X86Sse2);
56
base.cpu = "pentium4".into();
67
base.max_atomic_width = Some(64);
78
base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32", "-Wl,-melf_i386"]);

‎compiler/rustc_target/src/spec/targets/i686_unknown_uefi.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// The cdecl ABI is used. It differs from the stdcall or fastcall ABI.
66
// "i686-unknown-windows" is used to get the minimal subset of windows-specific features.
77

8-
use crate::spec::{Target, base};
8+
use crate::spec::{RustAbi, Target, base};
99

1010
pub(crate) fn target() -> Target {
1111
let mut base = base::uefi_msvc::opts();
@@ -22,6 +22,7 @@ pub(crate) fn target() -> Target {
2222
// If you initialize FP units yourself, you can override these flags with custom linker
2323
// arguments, thus giving you access to full MMX/SSE acceleration.
2424
base.features = "-mmx,-sse,+soft-float".into();
25+
base.rust_abi = Some(RustAbi::X86Softfloat);
2526

2627
// Use -GNU here, because of the reason below:
2728
// Background and Problem:

‎compiler/rustc_target/src/spec/targets/x86_64_unknown_none.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// features.
66

77
use crate::spec::{
8-
Cc, CodeModel, LinkerFlavor, Lld, PanicStrategy, RelroLevel, SanitizerSet, StackProbeType,
9-
Target, TargetOptions,
8+
Cc, CodeModel, LinkerFlavor, Lld, PanicStrategy, RelroLevel, RustAbi, SanitizerSet,
9+
StackProbeType, Target, TargetOptions,
1010
};
1111

1212
pub(crate) fn target() -> Target {
@@ -20,6 +20,7 @@ pub(crate) fn target() -> Target {
2020
relro_level: RelroLevel::Full,
2121
linker_flavor: LinkerFlavor::Gnu(Cc::No, Lld::Yes),
2222
linker: Some("rust-lld".into()),
23+
rust_abi: Some(RustAbi::X86Softfloat),
2324
features: "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,+soft-float".into(),
2425
supported_sanitizers: SanitizerSet::KCFI | SanitizerSet::KERNELADDRESS,
2526
disable_redzone: true,

‎compiler/rustc_target/src/spec/targets/x86_64_unknown_uefi.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// LLVM. "x86_64-unknown-windows" is used to get the minimal subset of windows-specific features.
77

88
use crate::abi::call::Conv;
9-
use crate::spec::{Target, base};
9+
use crate::spec::{RustAbi, Target, base};
1010

1111
pub(crate) fn target() -> Target {
1212
let mut base = base::uefi_msvc::opts();
@@ -26,6 +26,7 @@ pub(crate) fn target() -> Target {
2626
// If you initialize FP units yourself, you can override these flags with custom linker
2727
// arguments, thus giving you access to full MMX/SSE acceleration.
2828
base.features = "-mmx,-sse,+soft-float".into();
29+
base.rust_abi = Some(RustAbi::X86Softfloat);
2930

3031
Target {
3132
llvm_target: "x86_64-unknown-windows".into(),

‎compiler/rustc_target/src/target_features.rs

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet};
55
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
66
use rustc_span::{Symbol, sym};
77

8-
use crate::spec::{FloatAbi, Target};
8+
use crate::spec::{FloatAbi, RustAbi, Target};
99

1010
/// Features that control behaviour of rustc, rather than the codegen.
1111
/// These exist globally and are not in the target-specific lists below.
@@ -770,23 +770,47 @@ impl Target {
770770
// questions "which ABI is used".
771771
match &*self.arch {
772772
"x86" => {
773-
// We support 2 ABIs, hardfloat (default) and softfloat.
774-
// x86 has no sane ABI indicator so we have to use the target feature.
775-
if self.has_feature("soft-float") {
776-
NOTHING
777-
} else {
778-
// Hardfloat ABI. x87 must be enabled.
779-
FeatureConstraints { required: &["x87"], incompatible: &[] }
773+
// We use our own ABI indicator here; LLVM does not have anything native.
774+
match self.rust_abi {
775+
None => {
776+
// Default hardfloat ABI.
777+
// x87 must be enabled, soft-float must be disabled.
778+
FeatureConstraints { required: &["x87"], incompatible: &["soft-float"] }
779+
}
780+
Some(RustAbi::X86Sse2) => {
781+
// Extended hardfloat ABI. x87 and SSE2 must be enabled, soft-float must be disabled.
782+
FeatureConstraints {
783+
required: &["x87", "sse2"],
784+
incompatible: &["soft-float"],
785+
}
786+
}
787+
Some(RustAbi::X86Softfloat) => {
788+
// Softfloat ABI, requires corresponding target feature. That feature trumps
789+
// `x87` and all other FPU features so those do not matter.
790+
// Note that this one requirement is the entire implementation of the ABI!
791+
// LLVM handles the rest.
792+
FeatureConstraints { required: &["soft-float"], incompatible: &[] }
793+
}
780794
}
781795
}
782796
"x86_64" => {
783-
// We support 2 ABIs, hardfloat (default) and softfloat.
784-
// x86 has no sane ABI indicator so we have to use the target feature.
785-
if self.has_feature("soft-float") {
786-
NOTHING
787-
} else {
788-
// Hardfloat ABI. x87 and SSE2 must be enabled.
789-
FeatureConstraints { required: &["x87", "sse2"], incompatible: &[] }
797+
// We use our own ABI indicator here; LLVM does not have anything native.
798+
match self.rust_abi {
799+
None => {
800+
// Default hardfloat ABI. On x86-64, this always includes SSE2.
801+
FeatureConstraints {
802+
required: &["x87", "sse2"],
803+
incompatible: &["soft-float"],
804+
}
805+
}
806+
Some(RustAbi::X86Softfloat) => {
807+
// Softfloat ABI, requires corresponding target feature. That feature trumps
808+
// `x87` and all other FPU features so those do not matter.
809+
// Note that this one requirement is the entire implementation of the ABI!
810+
// LLVM handles the rest.
811+
FeatureConstraints { required: &["soft-float"], incompatible: &[] }
812+
}
813+
Some(r) => panic!("invalid Rust ABI for x86_64: {r:?}"),
790814
}
791815
}
792816
"arm" => {

‎tests/codegen/abi-x86-sse.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//@ compile-flags: -Z merge-functions=disabled
2+
3+
//@ revisions: x86-64
4+
//@[x86-64] compile-flags: --target x86_64-unknown-linux-gnu
5+
//@[x86-64] needs-llvm-components: x86
6+
7+
//@ revisions: x86-32
8+
//@[x86-32] compile-flags: --target i686-unknown-linux-gnu
9+
//@[x86-32] needs-llvm-components: x86
10+
11+
//@ revisions: x86-32-nosse
12+
//@[x86-32-nosse] compile-flags: --target i586-unknown-linux-gnu
13+
//@[x86-32-nosse] needs-llvm-components: x86
14+
15+
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
16+
#![no_core]
17+
#![crate_type = "lib"]
18+
19+
#[lang = "sized"]
20+
trait Sized {}
21+
22+
#[lang = "copy"]
23+
trait Copy {}
24+
25+
// Ensure this type is passed without ptr indirection on targets that
26+
// require SSE2.
27+
#[repr(simd)]
28+
pub struct Sse([f32; 4]);
29+
30+
// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}})
31+
// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}})
32+
// x86-32-nosse: void @sse_id(ptr {{[^,]*}} sret{{[^,]*}}, ptr {{[^,]*}})
33+
#[no_mangle]
34+
pub fn sse_id(x: Sse) -> Sse {
35+
x
36+
}

‎tests/codegen/intrinsics/transmute-x64.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ use std::mem::transmute;
99
// CHECK-LABEL: @check_sse_float_to_int(
1010
#[no_mangle]
1111
pub unsafe fn check_sse_float_to_int(x: __m128) -> __m128i {
12-
// CHECK-NOT: alloca
13-
// CHECK: %0 = load <4 x float>, ptr %x, align 16
14-
// CHECK: store <4 x float> %0, ptr %_0, align 16
12+
// FIXME: the MIR opt still works, but the ABI logic now introduces
13+
// an alloca here.
14+
// CHECK: alloca
15+
// CHECK: store <4 x float> %x, ptr %_0, align 16
1516
transmute(x)
1617
}
1718

‎tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> {
3838
#[no_mangle]
3939
pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
4040
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
41-
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
41+
// CHECK: ret <4 x float> %[[VAL:.+]]
4242
unsafe { std::mem::transmute(x) }
4343
}
4444

@@ -53,6 +53,6 @@ pub fn build_array_t(x: [f32; 4]) -> T {
5353
#[no_mangle]
5454
pub fn build_array_transmute_t(x: [f32; 4]) -> T {
5555
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
56-
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
56+
// CHECK: ret <4 x float> %[[VAL:.+]]
5757
unsafe { std::mem::transmute(x) }
5858
}

‎tests/codegen/simd/packed-simd.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//@ revisions:opt3 noopt
2+
//@ only-x86_64
23
//@[opt3] compile-flags: -Copt-level=3
34
//@[noopt] compile-flags: -Cno-prepopulate-passes
45

@@ -14,41 +15,41 @@ use core::{mem, ptr};
1415

1516
#[repr(simd, packed)]
1617
#[derive(Copy, Clone)]
17-
pub struct Simd<T, const N: usize>([T; N]);
18+
pub struct PackedSimd<T, const N: usize>([T; N]);
1819

1920
#[repr(simd)]
2021
#[derive(Copy, Clone)]
2122
pub struct FullSimd<T, const N: usize>([T; N]);
2223

2324
// non-powers-of-two have padding and need to be expanded to full vectors
24-
fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
25+
fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> {
2526
unsafe {
2627
let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit();
2728
ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
2829
tmp.assume_init()
2930
}
3031
}
3132

32-
// CHECK-LABEL: square_packed_full
33-
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
33+
// CHECK-LABEL: define <3 x float> @square_packed_full
3434
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
3535
#[no_mangle]
36-
pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
36+
pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> {
3737
// CHECK-NEXT: start
38-
// noopt: alloca [[RET_TYPE]], [[RET_ALIGN]]
39-
// CHECK: load <3 x float>
38+
// noopt: alloca
39+
// opt3-NOT: alloca
40+
// opt3: load <3 x float>
4041
let x = load(x);
4142
// CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
42-
// CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
43-
// CHECK-NEXT: ret void
43+
// opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]]
44+
// noopt: ret <3 x float> [[VREG:%[a-z0-9_]+]]
4445
unsafe { intrinsics::simd_mul(x, x) }
4546
}
4647

4748
// CHECK-LABEL: square_packed
4849
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
4950
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
5051
#[no_mangle]
51-
pub fn square_packed(x: Simd<f32, 3>) -> Simd<f32, 3> {
52+
pub fn square_packed(x: PackedSimd<f32, 3>) -> PackedSimd<f32, 3> {
5253
// CHECK-NEXT: start
5354
// CHECK-NEXT: load <3 x float>
5455
// noopt-NEXT: load <3 x float>

‎tests/ui/sse-abi-checks.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Ensure we trigger abi_unsupported_vector_types for target features that are usually enabled
22
//! on a target, but disabled in this file via a `-C` flag.
3-
//@ compile-flags: --crate-type=rlib --target=i686-unknown-linux-gnu -C target-feature=-sse,-sse2
3+
//@ compile-flags: --crate-type=rlib --target=i586-unknown-linux-gnu -C target-feature=-sse,-sse2
44
//@ build-pass
55
//@ ignore-pass (test emits codegen-time warnings)
66
//@ needs-llvm-components: x86

0 commit comments

Comments
 (0)
Please sign in to comment.