diff --git a/Cargo.lock b/Cargo.lock index 1daa8ddcbc245..5516d29b73453 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -306,6 +306,7 @@ dependencies = [ name = "bsan-rt" version = "0.1.0" dependencies = [ + "bsan-shared", "cbindgen", "hashbrown 0.15.2", "libc", @@ -313,6 +314,10 @@ dependencies = [ "smallvec", ] +[[package]] +name = "bsan-shared" +version = "0.1.0" + [[package]] name = "bstr" version = "1.10.0" @@ -3546,6 +3551,7 @@ dependencies = [ "ar_archive_writer", "arrayvec", "bitflags 2.6.0", + "bsan-shared", "cc", "either", "itertools", diff --git a/Cargo.toml b/Cargo.toml index 60e3b5578e22e..e54b3ec9e1086 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "src/tools/rustfmt", "src/tools/miri", "src/tools/miri/cargo-miri", + "src/tools/bsan/bsan-shared/", "src/tools/bsan/bsan-driver/", "src/tools/bsan/bsan-driver/cargo-bsan", "src/tools/bsan/bsan-rt/", @@ -49,7 +50,7 @@ members = [ "src/tools/opt-dist", "src/tools/coverage-dump", "src/tools/rustc-perf-wrapper", - "src/tools/wasm-component-ld", + "src/tools/wasm-component-ld", "src/tools/bsan/bsan-shared", ] exclude = [ diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 481d711102713..df3cc3b93bcd4 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -5,15 +5,14 @@ use std::{iter, ptr}; use libc::{c_char, c_uint}; use rustc_abi as abi; use rustc_abi::{Align, Size, WrappingRange}; -use rustc_codegen_ssa::MemFlags; use rustc_codegen_ssa::common::{IntPredicate, RealPredicate, SynchronizationScope, TypeKind}; use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue}; use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue}; use rustc_codegen_ssa::traits::*; +use rustc_codegen_ssa::{MemFlags, RetagInfo}; use rustc_data_structures::small_c_str::SmallCStr; use rustc_hir::def_id::DefId; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; -use rustc_middle::mir::{PlaceKind, RetagKind}; use rustc_middle::ty::layout::{ FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers, TyAndLayout, @@ -1164,16 +1163,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { self.call_lifetime_intrinsic("llvm.lifetime.end.p0i8", ptr, size); } - fn retag( - &mut self, - place: PlaceValue<&'ll Value>, - place_kind: PlaceKind, - retag_kind: RetagKind, - ) { + fn retag(&mut self, place: PlaceValue, info: RetagInfo) { self.call_intrinsic("llvm.bsan.retag", &[ place.llval, - self.cx.const_i8(place_kind as i8), - self.cx.const_i8(retag_kind as i8), + self.const_usize(info.size as u64), + self.const_u8(info.perm_kind.into()), + self.const_u8(info.protector_kind as u8), ]); } diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 5319cc052bd59..9fb51f935d4f5 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -1096,7 +1096,7 @@ impl<'ll> CodegenCx<'ll, '_> { ifn!("llvm.ptrmask", fn(ptr, t_isize) -> ptr); - ifn!("llvm.bsan.retag", fn(ptr, t_i8, t_i8) -> void); + ifn!("llvm.bsan.retag", fn(ptr, t_isize, t_i8, t_i8) -> void); None } diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index b898cfec79669..0ef69ce2dd272 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -8,6 +8,7 @@ edition = "2021" ar_archive_writer = "0.4.2" arrayvec = { version = "0.7", default-features = false } bitflags = "2.4.1" +bsan-shared = { path = "../../src/tools/bsan/bsan-shared" } cc = "1.1.23" either = "1.5.0" itertools = "0.12" diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 7dc8ab38a9764..c4f3c2501c4a6 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -58,6 +58,8 @@ pub mod size_of_val; pub mod target_features; pub mod traits; +pub use bsan_shared::RetagInfo; + rustc_fluent_macro::fluent_messages! { "../messages.ftl" } pub struct ModuleCodegen { diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index 0cbc5c45736e8..bd38a94875510 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -22,6 +22,7 @@ mod intrinsic; mod locals; pub mod operand; pub mod place; +mod retag; mod rvalue; mod statement; diff --git a/compiler/rustc_codegen_ssa/src/mir/retag.rs b/compiler/rustc_codegen_ssa/src/mir/retag.rs new file mode 100644 index 0000000000000..889b7f0e80db4 --- /dev/null +++ b/compiler/rustc_codegen_ssa/src/mir/retag.rs @@ -0,0 +1,381 @@ +use std::marker::PhantomData; + +use bsan_shared::{Permission, ProtectorKind, RetagInfo}; +use rustc_abi::{BackendRepr, FieldIdx, FieldsShape, VariantIdx, Variants}; +use rustc_middle::mir::{Place, RetagKind}; +use rustc_middle::ty::layout::{HasTyCtxt, TyAndLayout}; +use rustc_middle::ty::{self, Mutability}; +use rustc_session::config::BsanRetagFields; +use tracing::trace; + +use super::operand::OperandValue; +use super::place::PlaceValue; +use super::{BuilderMethods, FunctionCx, LocalRef}; +use crate::common::IntPredicate; +use crate::mir::place::PlaceRef; +use crate::traits::{ConstCodegenMethods, LayoutTypeCodegenMethods, MiscCodegenMethods}; + +// When we retag a Place, we need to traverse through all of its fields +// and/or variants and emit retags for all of the sub-places that contain references, +// Boxes, and other types that require retagging. Calculating a sub-place requires cg-ing pointer offsets +// from the initial place and branching on variants. Not all sub-places need to be retagged, so we cannot +// compute them eagerly. Instead, when traversing a place, we store unevaluated subplaces as "modifiers" +// from an initial place. Once we find a subplace that needs to be retagged, we apply all current modifiers +// to the "base" place that we started with. We store the intermediate results from calculating all subplaces +// along the "path" to the subplace we're visiting, so that when we traverse back up the path, we don't need to +// repeat work. For example, if a variant of an enum contains N sub-places that need retagging, +// then we only want to have to branch that variant once, instead of N times for each sub-place. + +/// Either a variant or a field. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Modifier { + Variant(VariantIdx), + Field(FieldIdx), +} + +impl Modifier { + fn apply_to<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( + self, + bx: &mut Bx, + place: &PlaceRef<'tcx, Bx::Value>, + ) -> (PlaceRef<'tcx, Bx::Value>, Option<(Bx::BasicBlock, Bx::BasicBlock)>) { + match self { + Modifier::Variant(idx) => { + let cx = bx.cx(); + let discrminant_ty = place.layout.ty.discriminant_ty(cx.tcx()); + let discrminant_for_variant = place + .layout + .ty + .discriminant_for_variant(cx.tcx(), idx) + .expect("Invalid variant."); + + let discriminant_backend_ty = + bx.immediate_backend_type(bx.layout_of(discrminant_ty)); + let discriminant_for_variant = + bx.const_uint_big(discriminant_backend_ty, discrminant_for_variant.val); + + let discriminant_actual = place.codegen_get_discr(bx, discrminant_ty); + + let is_variant = bx.append_sibling_block("variant"); + + let is_not_variant = bx.append_sibling_block("cont"); + + let cond = + bx.icmp(IntPredicate::IntEQ, discriminant_for_variant, discriminant_actual); + + bx.cond_br(cond, is_variant, is_not_variant); + + bx.switch_to_block(is_variant); + (place.project_downcast(bx, idx), Some((is_variant, is_not_variant))) + } + Modifier::Field(field_idx) => (place.project_field(bx, field_idx.as_usize()), None), + } + } +} +struct RetagCx<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> { + kind: RetagKind, + places: Vec>, + modifiers: Vec, + branches: Vec, + data: PhantomData<&'a ()>, +} + +impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> RetagCx<'a, 'tcx, Bx> { + fn visit(bx: &mut Bx, base: PlaceRef<'tcx, Bx::Value>, kind: RetagKind) { + let mut visitor = Self { + kind, + places: vec![base], + modifiers: vec![], + branches: vec![], + data: PhantomData::default(), + }; + visitor.visit_value(bx, base.layout); + } + + /// Applies each of the current modifiers to the base PlaceRef, cg-ing along the way. + #[allow(dead_code)] + fn crystallize(&mut self, bx: &mut Bx) -> PlaceRef<'tcx, Bx::Value> { + let mut curr_subplace = *self.places.last().unwrap(); + + let modifiers: Vec = self.modifiers.drain(..).collect(); + + for modifier in modifiers { + let (subplace, branch) = modifier.apply_to(bx, &curr_subplace); + if let Some((then, otherwise)) = branch { + bx.switch_to_block(then); + self.branches.push(otherwise) + } + curr_subplace = subplace; + self.places.push(curr_subplace); + } + + return curr_subplace; + } + + // Recursive actions, ready to be overloaded. + /// Visits the given value, dispatching as appropriate to more specialized visitors. + #[inline(always)] + fn visit_value(&mut self, bx: &mut Bx, layout: TyAndLayout<'tcx>) { + // If this place is smaller than a pointer, we know that it can't contain any + // pointers we need to retag, so we can stop recursion early. + // This optimization is crucial for ZSTs, because they can contain way more fields + // than we can ever visit. + if layout.is_sized() && layout.size < bx.tcx().data_layout.pointer_size { + return; + } + + // Check the type of this value to see what to do with it (retag, or recurse). + match layout.ty.kind() { + // If it is a trait object, switch to the real type that was used to create it. + ty::Dynamic(_data, _, ty::Dyn) => {} + ty::Dynamic(_data, _, ty::DynStar) => {} + &ty::Ref(_, _, mutability) => { + let place = self.crystallize(bx); + self.retag_ref_ty(bx, place, mutability); + } + + ty::RawPtr(_, _) => { + // We definitely do *not* want to recurse into raw pointers -- wide raw + // pointers have fields, and for dyn Trait pointees those can have reference + // type! + // We also do not want to reborrow them. + } + ty::Adt(adt, _) if adt.is_box() => { + // Recurse for boxes, they require some tricky handling and will end up in `visit_box` above. + // (Yes this means we technically also recursively retag the allocator itself + // even if field retagging is not enabled. *shrug*) + self.walk_value(bx, layout); + } + _ => { + // Not a reference/pointer/box. Only recurse if configured appropriately. + let recurse = match bx.cx().sess().retag_fields() { + BsanRetagFields::None => false, + BsanRetagFields::All => true, + BsanRetagFields::Scalar => { + // Matching `ArgAbi::new` at the time of writing, only fields of + // `Scalar` and `ScalarPair` ABI are considered. + matches!( + layout.backend_repr, + BackendRepr::Scalar(..) | BackendRepr::ScalarPair(..) + ) + } + }; + if recurse { + self.walk_value(bx, layout) + } + } + } + } + + /// Called each time we recurse down to a field of a "product-like" aggregate + /// (structs, tuples, arrays and the like, but not enums), passing in old (outer) + /// and new (inner) value. + /// This gives the visitor the chance to track the stack of nested fields that + /// we are descending through. + #[inline(always)] + fn visit_field(&mut self, bx: &mut Bx, layout: TyAndLayout<'tcx>, idx: FieldIdx) { + self.modifiers.push(Modifier::Field(idx)); + self.visit_value(bx, layout.field(bx.cx(), idx.as_usize())); + if self.modifiers.is_empty() { + self.places.pop().expect("A place should have been evaluated."); + } else { + self.modifiers.pop().expect("An unevaluated modifier should be present."); + } + } + /// Called when recursing into an enum variant. + /// This gives the visitor the chance to track the stack of nested fields that + /// we are descending through. + #[inline(always)] + fn visit_variant(&mut self, bx: &mut Bx, layout: TyAndLayout<'tcx>, vidx: VariantIdx) { + self.modifiers.push(Modifier::Variant(vidx)); + self.visit_value(bx, layout.for_variant(bx.cx(), vidx)); + if self.modifiers.is_empty() { + self.places.pop().expect("A place should have been resolved."); + let otherwise = self.branches.pop().expect("A conditional should have been inserted."); + bx.br(otherwise); + bx.switch_to_block(otherwise); + } else { + self.modifiers.pop(); + } + } + + fn inner_ptr_of_unique( + &mut self, + bx: &mut Bx, + unique_ptr: PlaceRef<'tcx, Bx::Value>, + ) -> PlaceRef<'tcx, Bx::Value> { + // Unfortunately there is some type junk in the way here: `unique_ptr` is a `Unique`... + // (which means another 2 fields, the second of which is a `PhantomData`) + assert_eq!(unique_ptr.layout.fields.count(), 2); + let phantom = unique_ptr.layout.field(bx.cx(), 1); + assert!( + phantom.ty.ty_adt_def().is_some_and(|adt| adt.is_phantom_data()), + "2nd field of `Unique` should be PhantomData but is {:?}", + phantom.ty, + ); + let nonnull_ptr = unique_ptr.project_field(bx, 0); + // ... that contains a `NonNull`... (gladly, only a single field here) + assert_eq!(nonnull_ptr.layout.fields.count(), 1); + // ... whose only field finally is a raw ptr + nonnull_ptr.project_field(bx, 0) + } + + fn retag_ref_ty( + &mut self, + bx: &mut Bx, + pointee: PlaceRef<'tcx, Bx::Value>, + mutability: Mutability, + ) { + let ty_is_freeze = pointee.layout.ty.is_freeze(bx.tcx(), bx.typing_env()); + let ty_is_unpin = pointee.layout.ty.is_unpin(bx.tcx(), bx.typing_env()); + let is_protected = self.kind == RetagKind::FnEntry; + + let perm_kind = match mutability { + Mutability::Not if ty_is_unpin => Permission::new_reserved(ty_is_freeze, is_protected), + Mutability::Mut if ty_is_freeze => Permission::new_frozen(), + // Raw pointers never enter this function so they are not handled. + // However raw pointers are not the only pointers that take the parent + // tag, this also happens for `!Unpin` `&mut`s and interior mutable + // `&`s, which are excluded above. + _ => return, + }; + + let size = pointee.layout.size.bytes_usize(); + + let protector_kind = + if is_protected { ProtectorKind::StrongProtector } else { ProtectorKind::NoProtector }; + let perm = RetagInfo::new(size, perm_kind, protector_kind); + bx.retag(pointee.val, perm); + } + + /// Compute permission for `Box`-like type (`Box` always, and also `Unique` if enabled). + /// These pointers allow deallocation so need a different kind of protector not handled + /// by `from_ref_ty`. + fn retag_unique_ty(&mut self, bx: &mut Bx, place: PlaceRef<'tcx, Bx::Value>) { + let ty = place.layout.ty; + let ty_is_unpin = ty.is_unpin(bx.tcx(), bx.typing_env()); + if ty_is_unpin { + let ty_is_freeze = ty.is_freeze(bx.tcx(), bx.typing_env()); + let is_protected = self.kind == RetagKind::FnEntry; + let size = place.layout.size.bytes_usize(); + let protector_kind: ProtectorKind = if is_protected { + ProtectorKind::WeakProtector + } else { + ProtectorKind::NoProtector + }; + + let perm_kind = Permission::new_reserved(ty_is_freeze, is_protected); + let perm = RetagInfo::new(size, perm_kind, protector_kind); + bx.retag(place.val, perm); + todo!() + } + } + + /// Traversal logic; should not be overloaded. + fn walk_value(&mut self, bx: &mut Bx, layout: TyAndLayout<'tcx>) { + let ty = layout.ty; + + trace!("walk_value: type: {ty}"); + + // Special treatment for special types, where the (static) layout is not sufficient. + match *ty.kind() { + // If it is a trait object, switch to the real type that was used to create it. + // ty placement with length 0, so we enter the `Array` case below which + // indirectly uses the metadata to determine the actual length. + + // However, `Box`... let's talk about `Box`. + ty::Adt(def, ..) if def.is_box() => { + // `Box` has two fields: the pointer we care about, and the allocator. + assert_eq!(layout.fields.count(), 2, "`Box` must have exactly 2 fields"); + + if ty.is_box_global(bx.tcx()) { + let current_place = self.crystallize(bx); + let unique_ptr = current_place.project_field(bx, 0); + let inner_ptr = self.inner_ptr_of_unique(bx, unique_ptr); + self.retag_unique_ty(bx, inner_ptr); + } + + // The second `Box` field is the allocator, which we recursively check for validity + // like in regular structs. + self.visit_field(bx, layout, FieldIdx::from_usize(1)); + } + + // Non-normalized types should never show up here. + ty::Param(..) + | ty::Alias(..) + | ty::Bound(..) + | ty::Placeholder(..) + | ty::Infer(..) + | ty::Error(..) => {} + + // The rest is handled below. + _ => {} + }; + + // Visit the fields of this value. + match &layout.fields { + FieldsShape::Primitive => {} + FieldsShape::Arbitrary { memory_index, .. } => { + for idx in memory_index.indices() { + self.visit_field(bx, layout, idx); + } + } + FieldsShape::Array { .. } => { + for idx in layout.fields.index_by_increasing_offset() { + self.visit_field(bx, layout, FieldIdx::from_usize(idx)); + } + } + _ => {} + } + + match &layout.variants { + Variants::Multiple { tag_field, variants, .. } => { + self.modifiers.push(Modifier::Field(FieldIdx::from_usize(*tag_field))); + for vidx in variants.indices().into_iter() { + self.visit_variant(bx, layout, vidx); + } + } + Variants::Single { .. } => {} + } + } +} + +impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { + pub(crate) fn codegen_retag(&mut self, bx: &mut Bx, place: &Place<'tcx>, kind: RetagKind) { + self.resolve_place(bx, place).map(|place| RetagCx::visit(bx, place, kind)); + } + + fn resolve_place( + &mut self, + bx: &mut Bx, + place: &Place<'tcx>, + ) -> Option> { + if let Some(index) = place.as_local() { + match self.locals[index] { + LocalRef::Place(cg_dest) => Some(cg_dest), + LocalRef::UnsizedPlace(cg_indirect_dest) => Some(cg_indirect_dest), + LocalRef::PendingOperand => None, + LocalRef::Operand(op) => { + let mono_ty = self.monomorphized_place_ty(place.as_ref()); + if mono_ty.is_any_ptr() { + let place_val = match op.val { + OperandValue::Ref(r) => Some(r), + OperandValue::Immediate(llval) => { + Some(PlaceValue::new_sized(llval, op.layout.align.abi)) + } + OperandValue::Pair(llptr, _) => { + Some(PlaceValue::new_sized(llptr, op.layout.align.abi).into()) + } + OperandValue::ZeroSized => None, + }; + place_val.map(|place_val| PlaceRef::new_sized(place_val.llval, op.layout)) + } else { + None + } + } + } + } else { + Some(self.codegen_place(bx, place.as_ref())) + } + } +} diff --git a/compiler/rustc_codegen_ssa/src/mir/statement.rs b/compiler/rustc_codegen_ssa/src/mir/statement.rs index 416d826c25ee4..8e2cb3939014d 100644 --- a/compiler/rustc_codegen_ssa/src/mir/statement.rs +++ b/compiler/rustc_codegen_ssa/src/mir/statement.rs @@ -1,10 +1,8 @@ -use rustc_middle::mir::{self, NonDivergingIntrinsic, PlaceKind}; +use rustc_middle::mir::{self, NonDivergingIntrinsic}; use rustc_middle::span_bug; use tracing::instrument; -use super::operand::OperandValue; use super::{FunctionCx, LocalRef}; -use crate::mir::place::PlaceValue; use crate::traits::*; impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { @@ -89,41 +87,9 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { let src = src_val.immediate(); bx.memcpy(dst, align, src, align, bytes, crate::MemFlags::empty()); } - mir::StatementKind::Retag(retag_kind, box ref place) => { + mir::StatementKind::Retag(kind, box ref place) => { if self.cx.sess().emit_retags() { - let place_value = if let Some(index) = place.as_local() { - match self.locals[index] { - LocalRef::Place(cg_dest) => cg_dest.val, - LocalRef::UnsizedPlace(cg_indirect_dest) => cg_indirect_dest.val, - LocalRef::PendingOperand => { - span_bug!( - statement.source_info.span, - "retagging an operand {:?} that has not created yet", - place - ); - } - LocalRef::Operand(op) => { - let mono_ty = self.monomorphized_place_ty(place.as_ref()); - if mono_ty.is_any_ptr() { - match op.val { - OperandValue::Ref(r) => r, - OperandValue::Immediate(llval) => { - PlaceValue::new_sized(llval, op.layout.align.abi) - } - OperandValue::Pair(llptr, _) => { - PlaceValue::new_sized(llptr, op.layout.align.abi) - } - OperandValue::ZeroSized => return, - } - } else { - return; - } - } - } - } else { - self.codegen_place(bx, place.as_ref()).val - }; - bx.retag(place_value, PlaceKind::Default, retag_kind); + self.codegen_retag(bx, place, kind); } } mir::StatementKind::FakeRead(..) diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs index b5823792e90b5..9428a525ac485 100644 --- a/compiler/rustc_codegen_ssa/src/traits/builder.rs +++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs @@ -1,9 +1,9 @@ use std::assert_matches::assert_matches; use std::ops::Deref; +use bsan_shared::RetagInfo; use rustc_abi::{Align, BackendRepr, Scalar, Size, WrappingRange}; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; -use rustc_middle::mir::{PlaceKind, RetagKind}; use rustc_middle::ty::layout::{FnAbiOf, LayoutOf, TyAndLayout}; use rustc_middle::ty::{Instance, Ty}; use rustc_session::config::OptLevel; @@ -337,12 +337,7 @@ pub trait BuilderMethods<'a, 'tcx>: flags: MemFlags, ); - fn retag( - &mut self, - place: PlaceValue, - place_kind: PlaceKind, - retag_kind: RetagKind, - ); + fn retag(&mut self, place: PlaceValue, perm: RetagInfo); /// *Typed* copy for non-overlapping places. /// diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index e48c4d46b597c..16b0286a1f5ee 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -8,12 +8,12 @@ use rustc_data_structures::profiling::TimePassesFormat; use rustc_errors::emitter::HumanReadableErrorType; use rustc_errors::{ColorConfig, registry}; use rustc_session::config::{ - BranchProtection, CFGuard, Cfg, CollapseMacroDebuginfo, CoverageLevel, CoverageOptions, - DebugInfo, DumpMonoStatsFormat, ErrorOutputType, ExternEntry, ExternLocation, Externs, - FmtDebug, FunctionReturn, InliningThreshold, Input, InstrumentCoverage, InstrumentXRay, - LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, MirIncludeSpans, NextSolverConfig, - OomStrategy, Options, OutFileName, OutputType, OutputTypes, PAuthKey, PacRet, Passes, - PatchableFunctionEntry, Polonius, ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, + BranchProtection, BsanRetagFields, CFGuard, Cfg, CollapseMacroDebuginfo, CoverageLevel, + CoverageOptions, DebugInfo, DumpMonoStatsFormat, ErrorOutputType, ExternEntry, ExternLocation, + Externs, FmtDebug, FunctionReturn, InliningThreshold, Input, InstrumentCoverage, + InstrumentXRay, LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, MirIncludeSpans, + NextSolverConfig, OomStrategy, Options, OutFileName, OutputType, OutputTypes, PAuthKey, PacRet, + Passes, PatchableFunctionEntry, Polonius, ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingVersion, WasiExecModel, build_configuration, build_session_options, rustc_optgroups, }; @@ -759,6 +759,7 @@ fn test_unstable_options_tracking_hash() { tracked!(assume_incomplete_release, true); tracked!(binary_dep_depinfo, true); tracked!(box_noalias, false); + tracked!(bsan_retag_fields, BsanRetagFields::All); tracked!( branch_protection, Some(BranchProtection { diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs index 2c4326a761309..132e7cfafcc97 100644 --- a/compiler/rustc_middle/src/mir/syntax.rs +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -520,15 +520,6 @@ pub enum RetagKind { Default, } -#[repr(C)] -#[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, PartialEq, Eq, Hash, HashStable)] -#[rustc_pass_by_value] -pub enum PlaceKind { - Freeze, - Unpin, - Default, -} - /// The `FakeReadCause` describes the type of pattern why a FakeRead statement exists. #[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, Hash, HashStable, PartialEq)] pub enum FakeReadCause { diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index 6e35cffada9e9..191ad5f035321 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -42,12 +42,6 @@ mod cfg; mod native_libs; pub mod sigpipe; -#[derive(Clone, Copy, PartialEq, Hash, Debug)] -pub enum AliasingModel { - Stack, - Tree, -} - /// The different settings that the `-C strip` flag can have. #[derive(Clone, Copy, PartialEq, Hash, Debug)] pub enum Strip { @@ -2911,7 +2905,7 @@ pub(crate) mod dep_tracking { }; use super::{ - AliasingModel, BranchProtection, CFGuard, CFProtection, CollapseMacroDebuginfo, + BranchProtection, BsanRetagFields, CFGuard, CFProtection, CollapseMacroDebuginfo, CoverageOptions, CrateType, DebugInfo, DebugInfoCompression, ErrorOutputType, FmtDebug, FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentXRay, LinkerPluginLto, LocationDetail, LtoCli, NextSolverConfig, OomStrategy, OptLevel, OutFileName, OutputType, @@ -3017,7 +3011,7 @@ pub(crate) mod dep_tracking { InliningThreshold, FunctionReturn, WasmCAbi, - AliasingModel + BsanRetagFields ); impl DepTrackingHash for (T1, T2) @@ -3292,3 +3286,28 @@ impl MirIncludeSpans { self == MirIncludeSpans::On } } +/// Whether retagging recurses into fields. `All` means it always recurses (the default, +/// and equivalent to -Zmiri-retag-fields without an explicit value), `None` means it never +/// recurses, `Scalar` means it only recurses for types where we would also emit noalias annotations +/// in the generated LLVM IR (types passed as individual scalars or pairs of scalars). Setting this +/// to `None`` is unsound. +#[derive(Clone, Copy, Default, PartialEq, Hash, Debug)] +pub enum BsanRetagFields { + #[default] + All, + None, + Scalar, +} + +impl FromStr for BsanRetagFields { + type Err = (); + + fn from_str(s: &str) -> Result { + Ok(match s { + "all" => BsanRetagFields::All, + "none" => BsanRetagFields::None, + "scalar" => BsanRetagFields::Scalar, + _ => return Err(()), + }) + } +} diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 2b5737dedb284..73162e2fa31e5 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -454,7 +454,7 @@ mod desc { pub(crate) const parse_wasm_c_abi: &str = "`legacy` or `spec`"; pub(crate) const parse_mir_include_spans: &str = "either a boolean (`yes`, `no`, `on`, `off`, etc), or `nll` (default: `nll`)"; - pub(crate) const parse_aliasing_model: &str = "either 'tree' (default) or 'stack'"; + pub(crate) const parse_bsan_retag_fields: &str = "one of `all`, `none`, or `scalar`"; } pub mod parse { @@ -1204,6 +1204,14 @@ pub mod parse { true } + pub(crate) fn parse_bsan_retag_fields(slot: &mut BsanRetagFields, v: Option<&str>) -> bool { + match v.and_then(|s| BsanRetagFields::from_str(s).ok()) { + Some(retagfields) => *slot = retagfields, + _ => return false, + } + true + } + pub(crate) fn parse_remap_path_scope( slot: &mut RemapPathScopeComponents, v: Option<&str>, @@ -1535,15 +1543,6 @@ pub mod parse { true } - - pub(crate) fn parse_aliasing_model(slot: &mut AliasingModel, v: Option<&str>) -> bool { - *slot = match v { - Some("tree") => AliasingModel::Tree, - Some("stack") => AliasingModel::Stack, - _ => return false, - }; - true - } } options! { @@ -1690,8 +1689,6 @@ options! { // - src/doc/unstable-book/src/compiler-flags // tidy-alphabetical-start - aliasing_model: AliasingModel = (AliasingModel::Tree, parse_aliasing_model, [TRACKED], - "set the aliasing model ('tree' (default), 'stack')."), allow_features: Option> = (None, parse_opt_comma_list, [TRACKED], "only allow the listed language features to be enabled in code (comma separated)"), always_encode_mir: bool = (false, parse_bool, [TRACKED], @@ -1707,6 +1704,11 @@ options! { (default: no)"), box_noalias: bool = (true, parse_bool, [TRACKED], "emit noalias metadata for box (default: yes)"), + bsan_retag_fields: BsanRetagFields = (BsanRetagFields::default(), parse_bsan_retag_fields, [TRACKED], + "control whether retags are recursively applied to the fields of a place (default: `all`). \ + `all` emits retags for every field. + `none` skips retagging fields. This option is unsound. + `scalar` only retags fields of values with a scalar ABI."), branch_protection: Option = (None, parse_branch_protection, [TRACKED], "set options for branch target identification and pointer authentication on AArch64"), cf_protection: CFProtection = (CFProtection::None, parse_cfprotection, [TRACKED], diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index 88ec2c0dc9997..a5520765c292b 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -40,8 +40,8 @@ use rustc_target::spec::{ use crate::code_stats::CodeStats; pub use crate::code_stats::{DataTypeKind, FieldInfo, FieldKind, SizeKind, VariantInfo}; use crate::config::{ - self, CoverageLevel, CrateType, DebugInfo, ErrorOutputType, FunctionReturn, Input, - InstrumentCoverage, OptLevel, OutFileName, OutputType, RemapPathScopeComponents, + self, BsanRetagFields, CoverageLevel, CrateType, DebugInfo, ErrorOutputType, FunctionReturn, + Input, InstrumentCoverage, OptLevel, OutFileName, OutputType, RemapPathScopeComponents, SwitchWithOptPath, }; use crate::filesearch::FileSearch; @@ -626,6 +626,10 @@ impl Session { || self.opts.unstable_opts.sanitizer.intersects(SanitizerSet::BORROW) } + pub fn retag_fields(&self) -> BsanRetagFields { + self.opts.unstable_opts.bsan_retag_fields + } + pub fn diagnostic_width(&self) -> usize { let default_column_width = 140; if let Some(width) = self.opts.diagnostic_width { diff --git a/src/llvm-project b/src/llvm-project index b21aecce55f64..11bcd0496c392 160000 --- a/src/llvm-project +++ b/src/llvm-project @@ -1 +1 @@ -Subproject commit b21aecce55f642a0be680952c8c32723d5c326e7 +Subproject commit 11bcd0496c39246dab772d97e0b78a9890c29609 diff --git a/src/tools/bsan/bsan-rt/Cargo.toml b/src/tools/bsan/bsan-rt/Cargo.toml index 6c2b424c4e077..073c17397f529 100644 --- a/src/tools/bsan/bsan-rt/Cargo.toml +++ b/src/tools/bsan/bsan-rt/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +bsan-shared = { path = "../bsan-shared" } libc = { version = "0.2.169", default-features = false } hashbrown = { version = "0.15.2", default-features = false, features = ["default-hasher", "nightly", "inline-more"] } rustc-hash = { version = "2.1.1", default-features = false } diff --git a/src/tools/bsan/bsan-rt/src/global.rs b/src/tools/bsan/bsan-rt/src/global.rs index 8373a164be09f..b72845dc4acf4 100644 --- a/src/tools/bsan/bsan-rt/src/global.rs +++ b/src/tools/bsan/bsan-rt/src/global.rs @@ -91,10 +91,10 @@ impl GlobalCtx { /// to be called directly; instead, it should be used with the `print!`, /// `println!`, and `ui_test!` macros. pub fn print(&self, args: fmt::Arguments<'_>) { - let mut w = BVec::new(self); - let _ = write!(&mut w, "{}", args); + let mut buffer = BVec::new(self); + let _ = write!(&mut buffer, "{args}"); unsafe { - (self.hooks.print)(mem::transmute(w.as_ptr())); + (self.hooks.print)(mem::transmute(buffer.as_ptr())); } } } diff --git a/src/tools/bsan/bsan-rt/src/lib.rs b/src/tools/bsan/bsan-rt/src/lib.rs index 0d3cd07dc3d3e..c5dd7b534cf60 100644 --- a/src/tools/bsan/bsan-rt/src/lib.rs +++ b/src/tools/bsan/bsan-rt/src/lib.rs @@ -18,6 +18,8 @@ use core::panic::PanicInfo; use core::ptr::NonNull; use core::{fmt, mem, ptr}; +use bsan_shared::*; + mod global; pub use global::*; @@ -232,7 +234,15 @@ unsafe extern "C" fn bsan_deinit() { /// Creates a new borrow tag for the given provenance object. #[no_mangle] -extern "C" fn bsan_retag(span: Span, prov: *mut Provenance, retag_kind: u8, place_kind: u8) {} +extern "C" fn bsan_retag( + span: Span, + prov: *mut Provenance, + size: usize, + perm_kind: u8, + protector_kind: u8, +) { + let _ = unsafe { RetagInfo::from_raw(size, perm_kind, protector_kind) }; +} /// Records a read access of size `access_size` at the given address `addr` using the provenance `prov`. #[no_mangle] diff --git a/src/tools/bsan/bsan-shared/Cargo.toml b/src/tools/bsan/bsan-shared/Cargo.toml new file mode 100644 index 0000000000000..364844070ed83 --- /dev/null +++ b/src/tools/bsan/bsan-shared/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "bsan-shared" +version = "0.1.0" +edition = "2021" + +[dependencies] + +[package.metadata.rust-analyzer] +rustc_private = true \ No newline at end of file diff --git a/src/tools/bsan/bsan-shared/build.rs b/src/tools/bsan/bsan-shared/build.rs new file mode 100644 index 0000000000000..ffa0e1854f426 --- /dev/null +++ b/src/tools/bsan/bsan-shared/build.rs @@ -0,0 +1,4 @@ +fn main() { + // Don't rebuild bsan when nothing changed. + println!("cargo:rerun-if-changed=build.rs"); +} diff --git a/src/tools/bsan/bsan-shared/src/lib.rs b/src/tools/bsan/bsan-shared/src/lib.rs new file mode 100644 index 0000000000000..818a3925fe022 --- /dev/null +++ b/src/tools/bsan/bsan-shared/src/lib.rs @@ -0,0 +1,3 @@ +#![cfg_attr(not(test), no_std)] +mod perms; +pub use perms::*; diff --git a/src/tools/bsan/bsan-shared/src/perms.rs b/src/tools/bsan/bsan-shared/src/perms.rs new file mode 100644 index 0000000000000..39cb9745b5cfe --- /dev/null +++ b/src/tools/bsan/bsan-shared/src/perms.rs @@ -0,0 +1,212 @@ +use core::cmp::Ordering; +use core::cmp::Ordering::*; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct RetagInfo { + pub size: usize, + pub perm_kind: Permission, + pub protector_kind: ProtectorKind, +} + +impl RetagInfo { + #[inline] + pub fn new(size: usize, perm_kind: Permission, protector_kind: ProtectorKind) -> Self { + Self { size, perm_kind, protector_kind } + } + + pub unsafe fn from_raw(size: usize, perm_kind: u8, protector_kind: u8) -> Self { + let perm_kind = unsafe { Permission::from_raw(perm_kind) }; + let protector_kind = unsafe { ProtectorKind::from_raw(protector_kind) }; + Self::new(size, perm_kind, protector_kind) + } +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ProtectorKind { + NoProtector, + /// Protected against aliasing violations from other pointers. + /// + /// Items protected like this cause UB when they are invalidated, *but* the pointer itself may + /// still be used to issue a deallocation. + /// + /// This is required for LLVM IR pointers that are `noalias` but *not* `dereferenceable`. + WeakProtector, + + /// Protected against any kind of invalidation. + /// + /// Items protected like this cause UB when they are invalidated or the memory is deallocated. + /// This is strictly stronger protection than `WeakProtector`. + /// + /// This is required for LLVM IR pointers that are `dereferenceable` (and also allows `noalias`). + StrongProtector, +} + +impl ProtectorKind { + unsafe fn from_raw(protector_kind: u8) -> Self { + unsafe { core::mem::transmute::(protector_kind) } + } +} + +/// The activation states of a pointer. +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum PermissionPriv { + /// represents: a local mutable reference that has not yet been written to; + /// allows: child reads, foreign reads; + /// affected by: child writes (becomes Active), + /// rejects: foreign writes (Disabled). + /// + /// `ReservedFrz` is mostly for types that are `Freeze` (no interior mutability). + /// If the type has interior mutability, see `ReservedIM` instead. + /// (Note: since the discovery of `tests/fail/tree_borrows/reservedim_spurious_write.rs`, + /// we also use `ReservedFreeze` for mutable references that were retagged with a protector + /// independently of interior mutability) + /// + /// special case: beuse self::PermissionPriv::*;haves differently when protected, which is where `conflicted` + /// is relevant + /// - `conflicted` is set on foreign reads, + /// - `conflicted` must not be set on child writes (there is UB otherwise). + /// + /// This is so that the behavior of `Reserved` adheres to the rules of `noalias`: + /// - foreign-read then child-write is UB due to `conflicted`, + /// - child-write then foreign-read is UB since child-write will activate and then + /// foreign-read disables a protected `Active`, which is UB. + ReservedFrz, + + #[allow(dead_code)] + ReservedFrzConf, + /// Alternative version of `ReservedFrz` made for types with interior mutability. + /// allows: child reads, foreign reads, foreign writes (extra); + /// affected by: child writes (becomes Active); + /// rejects: nothing. + ReservedIM, + /// represents: a unique pointer; + /// allows: child reads, child writes; + /// rejects: foreign reads (Frozen), foreign writes (Disabled). + Active, + /// represents: a shared pointer; + /// allows: all read accesses; + /// rejects child writes (UB), foreign writes (Disabled). + Frozen, + /// represents: a dead pointer; + /// allows: all foreign accesses; + /// rejects: all child accesses (UB). + Disabled, +} +use self::PermissionPriv::*; + +impl PartialOrd for PermissionPriv { + /// PermissionPriv is ordered by the reflexive transitive closure of + /// `Reserved(conflicted=false) < Reserved(conflicted=true) < Active < Frozen < Disabled`. + /// `Reserved` that have incompatible `ty_is_freeze` are incomparable to each other. + /// This ordering matches the reachability by transitions, as asserted by the exhaustive test + /// `permissionpriv_partialord_is_reachability`. + fn partial_cmp(&self, other: &Self) -> Option { + Some(match (self, other) { + (a, b) if a == b => Equal, + (Disabled, _) => Greater, + (_, Disabled) => Less, + (Frozen, _) => Greater, + (_, Frozen) => Less, + (Active, _) => Greater, + (_, Active) => Less, + (ReservedIM, ReservedIM) => Equal, + (ReservedFrz, ReservedFrz) => Equal, + (ReservedFrzConf, ReservedFrzConf) => Equal, + (ReservedFrz, ReservedFrzConf) => Less, + // Versions of `Reserved` with different interior mutability are incomparable with each + // other. + (ReservedFrzConf, ReservedFrz) => Greater, + (ReservedIM, ReservedFrz) | (ReservedFrz, ReservedIM) => return None, + (ReservedIM, ReservedFrzConf) | (ReservedFrzConf, ReservedIM) => return None, + }) + } +} + +impl PermissionPriv { + /// Check if `self` can be the initial state of a pointer. + fn is_initial(&self) -> bool { + matches!(self, ReservedFrz | Frozen | ReservedIM) + } + + /// Reject `ReservedIM` that cannot exist in the presence of a protector. + fn compatible_with_protector(&self) -> bool { + !matches!(self, ReservedIM) + } +} + +#[repr(transparent)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + +pub struct Permission { + inner: PermissionPriv, +} +impl Permission { + #[inline] + pub unsafe fn from_raw(perm_priv: u8) -> Self { + let inner: PermissionPriv = + unsafe { core::mem::transmute::(perm_priv) }; + Self { inner } + } + + /// Check if `self` can be the initial state of a pointer. + pub fn is_initial(&self) -> bool { + self.inner.is_initial() + } + /// Check if `self` is the terminal state of a pointer (is `Disabled`). + pub fn is_disabled(&self) -> bool { + self.inner == Disabled + } + /// Check if `self` is the post-child-write state of a pointer (is `Active`). + pub fn is_active(&self) -> bool { + self.inner == Active + } + + /// Default initial permission of the root of a new tree at inbounds positions. + /// Must *only* be used for the root, this is not in general an "initial" permission! + pub fn new_active() -> Self { + Self { inner: Active } + } + + /// Default initial permission of a reborrowed mutable reference that is either + /// protected or not interior mutable. + fn new_reserved_frz() -> Self { + Self { inner: ReservedFrz } + } + + /// Default initial permission of an unprotected interior mutable reference. + fn new_reserved_im() -> Self { + Self { inner: ReservedIM } + } + + /// Wrapper around `new_reserved_frz` and `new_reserved_im` that decides + /// which to call based on the interior mutability and the retag kind (whether there + /// is a protector is relevant because being protected takes priority over being + /// interior mutable) + pub fn new_reserved(ty_is_freeze: bool, protected: bool) -> Self { + if ty_is_freeze || protected { Self::new_reserved_frz() } else { Self::new_reserved_im() } + } + + /// Default initial permission of a reborrowed shared reference. + pub fn new_frozen() -> Self { + Self { inner: Frozen } + } + + /// Default initial permission of the root of a new tree at out-of-bounds positions. + /// Must *only* be used for the root, this is not in general an "initial" permission! + pub fn new_disabled() -> Self { + Self { inner: Disabled } + } + + /// Reject `ReservedIM` that cannot exist in the presence of a protector. + pub fn compatible_with_protector(&self) -> bool { + self.inner.compatible_with_protector() + } +} + +impl Into for Permission { + fn into(self) -> u8 { + self.inner as u8 + } +}