From 2976158fe1cee65a342cc2ab28ddb2c940a643df Mon Sep 17 00:00:00 2001 From: Jamie Nicol Date: Mon, 13 Jan 2025 15:37:39 +0000 Subject: [PATCH] [naga msl-out hlsl-out] Improve workaround for infinite loops causing undefined behaviour We must ensure that all loops emitted by the naga backends will terminate, in order to avoid undefined behaviour. This was previously implemented for the msl backend in 6545. However, the usage of `volatile` prevents the compiler from making other important optimizations. This patch improves the msl workaround and additionally implements it for hlsl. The spv implementation will be left for a follow up. Rather than using volatile, this patch increments a counter on every loop iteration, breaking from the loop after 2^64 iterations. This ensures the compiler treats the loop as finite thereby avoiding undefined behaviour, whilst at the same time allowing for other optimizations and in reality not actually affecting execution. --- naga/src/back/hlsl/mod.rs | 4 + naga/src/back/hlsl/writer.rs | 50 +++++++- naga/src/back/msl/writer.rs | 117 ++++++++++--------- naga/tests/out/hlsl/boids.hlsl | 3 + naga/tests/out/hlsl/break-if.hlsl | 12 ++ naga/tests/out/hlsl/collatz.hlsl | 3 + naga/tests/out/hlsl/control-flow.hlsl | 18 +++ naga/tests/out/hlsl/do-while.hlsl | 3 + naga/tests/out/hlsl/ray-query.hlsl | 3 + naga/tests/out/hlsl/shadow.hlsl | 6 + naga/tests/out/msl/atomicCompareExchange.msl | 17 ++- naga/tests/out/msl/boids.msl | 5 +- naga/tests/out/msl/break-if.msl | 17 ++- naga/tests/out/msl/collatz.msl | 5 +- naga/tests/out/msl/control-flow.msl | 25 ++-- naga/tests/out/msl/do-while.msl | 5 +- naga/tests/out/msl/overrides-ray-query.msl | 5 +- naga/tests/out/msl/ray-query.msl | 5 +- naga/tests/out/msl/shadow.msl | 9 +- wgpu-hal/src/dx12/device.rs | 6 +- 20 files changed, 223 insertions(+), 95 deletions(-) diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index dcce866bac..1a80dcbb64 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -211,6 +211,9 @@ pub struct Options { pub zero_initialize_workgroup_memory: bool, /// Should we restrict indexing of vectors, matrices and arrays? pub restrict_indexing: bool, + /// If set, loops will have code injected into them, forcing the compiler + /// to think the number of iterations is bounded. + pub force_loop_bounding: bool, } impl Default for Options { @@ -223,6 +226,7 @@ impl Default for Options { push_constants_target: None, zero_initialize_workgroup_memory: true, restrict_indexing: true, + force_loop_bounding: true, } } } diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index b5df135766..6914869969 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -131,6 +131,33 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.need_bake_expressions.clear(); } + /// Generates statements to be inserted immediately before and at the very + /// start of the body of each loop, to defeat infinite loop reasoning. + /// The 0th item of the returned tuple should be inserted immediately prior + /// to the loop and the 1st item should be inserted at the very start of + /// the loop body. + /// + /// See [`back::msl::Writer::gen_force_bounded_loop_statements`] for details. + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + ) -> Option<(String, String)> { + if !self.options.force_loop_bounding { + return None; + } + + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u, 0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if (all({loop_bound_name} == uint2({max}u, {max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); + + Some((decl, break_and_inc)) + } + /// Helper method used to find which expressions of a given function require baking /// /// # Notes @@ -2048,12 +2075,24 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ref continuing, break_if, } => { + let force_loop_bound_statements = self.gen_force_bounded_loop_statements(level); + let gate_name = (!continuing.is_empty() || break_if.is_some()) + .then(|| self.namer.call("loop_init")); + + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } + if let Some(ref gate_name) = gate_name { + writeln!(self.out, "{level}bool {gate_name} = true;")?; + } + self.continue_ctx.enter_loop(); + writeln!(self.out, "{level}while(true) {{")?; + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; + } let l2 = level.next(); - if !continuing.is_empty() || break_if.is_some() { - let gate_name = self.namer.call("loop_init"); - writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!(self.out, "{level}while(true) {{")?; + if let Some(gate_name) = gate_name { writeln!(self.out, "{l2}if (!{gate_name}) {{")?; let l3 = l2.next(); for sta in continuing.iter() { @@ -2068,13 +2107,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } writeln!(self.out, "{l2}}}")?; writeln!(self.out, "{l2}{gate_name} = false;")?; - } else { - writeln!(self.out, "{level}while(true) {{")?; } for sta in body.iter() { self.write_stmt(module, sta, func_ctx, l2)?; } + writeln!(self.out, "{level}}}")?; self.continue_ctx.exit_loop(); } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index cdfd62129b..396e898083 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -383,11 +383,6 @@ pub struct Writer { /// Set of (struct type, struct field index) denoting which fields require /// padding inserted **before** them (i.e. between fields at index - 1 and index) struct_member_pads: FastHashSet<(Handle, u32)>, - - /// Name of the force-bounded-loop macro. - /// - /// See `emit_force_bounded_loop_macro` for details. - force_bounded_loop_macro_name: String, } impl crate::Scalar { @@ -601,7 +596,7 @@ struct ExpressionContext<'a> { /// accesses. These may need to be cached in temporary variables. See /// `index::find_checked_indexes` for details. guarded_indices: HandleSet, - /// See [`Writer::emit_force_bounded_loop_macro`] for details. + /// See [`Writer::gen_force_bounded_loop_statements`] for details. force_loop_bounding: bool, } @@ -685,7 +680,6 @@ impl Writer { #[cfg(test)] put_block_stack_pointers: Default::default(), struct_member_pads: FastHashSet::default(), - force_bounded_loop_macro_name: String::default(), } } @@ -696,17 +690,11 @@ impl Writer { self.out } - /// Define a macro to invoke at the bottom of each loop body, to - /// defeat MSL infinite loop reasoning. - /// - /// If we haven't done so already, emit the definition of a preprocessor - /// macro to be invoked at the end of each loop body in the generated MSL, - /// to ensure that the MSL compiler's optimizations do not remove bounds - /// checks. - /// - /// Only the first call to this function for a given module actually causes - /// the macro definition to be written. Subsequent loops can simply use the - /// prior macro definition, since macros aren't block-scoped. + /// Generates statements to be inserted immediately before and at the very + /// start of the body of each loop, to defeat MSL infinite loop reasoning. + /// The 0th item of the returned tuple should be inserted immediately prior + /// to the loop and the 1st item should be inserted at the very start of + /// the loop body. /// /// # What is this trying to solve? /// @@ -774,7 +762,8 @@ impl Writer { /// but which in fact generates no instructions. Unfortunately, inline /// assembly is not handled correctly by some Metal device drivers. /// - /// Instead, we add the following code to the bottom of every loop: + /// A previously used approach was to add the following code to the bottom + /// of every loop: /// /// ```ignore /// if (volatile bool unpredictable = false; unpredictable) @@ -785,37 +774,47 @@ impl Writer { /// the `volatile` qualifier prevents the compiler from assuming this. Thus, /// it must assume that the `break` might be reached, and hence that the /// loop is not unbounded. This prevents the range analysis impact described - /// above. + /// above. Unfortunately this prevented the compiler from making important, + /// and safe, optimizations such as loop unrolling and was observed to + /// significantly hurt performance. /// - /// Unfortunately, what makes this a kludge, not a hack, is that this - /// solution leaves the GPU executing a pointless conditional branch, at - /// runtime, in every iteration of the loop. There's no part of the system - /// that has a global enough view to be sure that `unpredictable` is true, - /// and remove it from the code. Adding the branch also affects - /// optimization: for example, it's impossible to unroll this loop. This - /// transformation has been observed to significantly hurt performance. + /// Our current approach declares a counter before every loop and + /// increments it every iteration, breaking after 2^64 iterations: + /// + /// ```ignore + /// uint2 loop_bound = uint2(0); + /// while (true) { + /// if (metal::all(loop_bound == uint2(4294967295))) { break; } + /// loop_bound += uint2(loop_bound.y == 4294967295, 1); + /// } + /// ``` /// - /// To make our output a bit more legible, we pull the condition out into a - /// preprocessor macro defined at the top of the module. + /// This convinces the compiler that the loop is finite and therefore may + /// execute, whilst at the same time allowing optimizations such as loop + /// unrolling. Furthermore the 64-bit counter is large enough it seems + /// implausible that it would affect the execution of any shader. /// /// This approach is also used by Chromium WebGPU's Dawn shader compiler: - /// - fn emit_force_bounded_loop_macro(&mut self) -> BackendResult { - if !self.force_bounded_loop_macro_name.is_empty() { - return Ok(()); + /// + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + context: &StatementContext, + ) -> Option<(String, String)> { + if !context.expression.force_loop_bounding { + return None; } - self.force_bounded_loop_macro_name = self.namer.call("LOOP_IS_BOUNDED"); - let loop_bounded_volatile_name = self.namer.call("unpredictable_break_from_loop"); - writeln!( - self.out, - "#define {} {{ volatile bool {} = false; if ({}) break; }}", - self.force_bounded_loop_macro_name, - loop_bounded_volatile_name, - loop_bounded_volatile_name, - )?; + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if ({NAMESPACE}::all({loop_bound_name} == uint2({max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); - Ok(()) + Some((decl, break_and_inc)) } fn put_call_parameters( @@ -3201,10 +3200,23 @@ impl Writer { ref continuing, break_if, } => { - if !continuing.is_empty() || break_if.is_some() { - let gate_name = self.namer.call("loop_init"); + let force_loop_bound_statements = + self.gen_force_bounded_loop_statements(level, context); + let gate_name = (!continuing.is_empty() || break_if.is_some()) + .then(|| self.namer.call("loop_init")); + + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } + if let Some(ref gate_name) = gate_name { writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!(self.out, "{level}while(true) {{",)?; + } + + writeln!(self.out, "{level}while(true) {{",)?; + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; + } + if let Some(ref gate_name) = gate_name { let lif = level.next(); let lcontinuing = lif.next(); writeln!(self.out, "{lif}if (!{gate_name}) {{")?; @@ -3218,19 +3230,9 @@ impl Writer { } writeln!(self.out, "{lif}}}")?; writeln!(self.out, "{lif}{gate_name} = false;")?; - } else { - writeln!(self.out, "{level}while(true) {{",)?; } self.put_block(level.next(), body, context)?; - if context.expression.force_loop_bounding { - self.emit_force_bounded_loop_macro()?; - writeln!( - self.out, - "{}{}", - level.next(), - self.force_bounded_loop_macro_name - )?; - } + writeln!(self.out, "{level}}}")?; } crate::Statement::Break => { @@ -3724,7 +3726,6 @@ impl Writer { &[CLAMPED_LOD_LOAD_PREFIX], &mut self.names, ); - self.force_bounded_loop_macro_name.clear(); self.struct_member_pads.clear(); writeln!( diff --git a/naga/tests/out/hlsl/boids.hlsl b/naga/tests/out/hlsl/boids.hlsl index 22e9c6cefd..8934a9bca2 100644 --- a/naga/tests/out/hlsl/boids.hlsl +++ b/naga/tests/out/hlsl/boids.hlsl @@ -41,8 +41,11 @@ void main(uint3 global_invocation_id : SV_DispatchThreadID) vPos = _e8; float2 _e14 = asfloat(particlesSrc.Load2(8+index*16+0)); vVel = _e14; + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e91 = i; i = (_e91 + 1u); diff --git a/naga/tests/out/hlsl/break-if.hlsl b/naga/tests/out/hlsl/break-if.hlsl index 63a0185583..cb10886543 100644 --- a/naga/tests/out/hlsl/break-if.hlsl +++ b/naga/tests/out/hlsl/break-if.hlsl @@ -1,7 +1,10 @@ void breakIfEmpty() { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { if (true) { break; @@ -17,8 +20,11 @@ void breakIfEmptyBody(bool a) bool b = (bool)0; bool c = (bool)0; + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { b = a; bool _e2 = b; @@ -38,8 +44,11 @@ void breakIf(bool a_1) bool d = (bool)0; bool e = (bool)0; + uint2 loop_bound_2 = uint2(0u, 0u); bool loop_init_2 = true; while(true) { + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_2) { bool _e5 = e; if ((a_1 == _e5)) { @@ -58,8 +67,11 @@ void breakIfSeparateVariable() { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u, 0u); bool loop_init_3 = true; while(true) { + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); if (!loop_init_3) { uint _e5 = counter; if ((_e5 == 5u)) { diff --git a/naga/tests/out/hlsl/collatz.hlsl b/naga/tests/out/hlsl/collatz.hlsl index b00586aa4c..3a250a4f25 100644 --- a/naga/tests/out/hlsl/collatz.hlsl +++ b/naga/tests/out/hlsl/collatz.hlsl @@ -6,7 +6,10 @@ uint collatz_iterations(uint n_base) uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); uint _e4 = n; if ((_e4 > 1u)) { } else { diff --git a/naga/tests/out/hlsl/control-flow.hlsl b/naga/tests/out/hlsl/control-flow.hlsl index 2438858a8a..7a8cb73779 100644 --- a/naga/tests/out/hlsl/control-flow.hlsl +++ b/naga/tests/out/hlsl/control-flow.hlsl @@ -20,7 +20,10 @@ void switch_case_break() void loop_switch_continue(int x) { + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool should_continue = false; switch(x) { case 1: { @@ -40,7 +43,10 @@ void loop_switch_continue(int x) void loop_switch_continue_nesting(int x_1, int y, int z) { + uint2 loop_bound_1 = uint2(0u, 0u); while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); bool should_continue_1 = false; switch(x_1) { case 1: { @@ -54,7 +60,10 @@ void loop_switch_continue_nesting(int x_1, int y, int z) break; } default: { + uint2 loop_bound_2 = uint2(0u, 0u); while(true) { + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); bool should_continue_2 = false; switch(z) { case 1: { @@ -93,7 +102,10 @@ void loop_switch_continue_nesting(int x_1, int y, int z) continue; } } + uint2 loop_bound_3 = uint2(0u, 0u); while(true) { + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); bool should_continue_4 = false; do { do { @@ -115,7 +127,10 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u, 0u); while(true) { + if (all(loop_bound_4 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); bool should_continue_5 = false; switch(x_2) { case 1: { @@ -127,7 +142,10 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) } } } + uint2 loop_bound_5 = uint2(0u, 0u); while(true) { + if (all(loop_bound_5 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); bool should_continue_6 = false; switch(x_2) { case 1: { diff --git a/naga/tests/out/hlsl/do-while.hlsl b/naga/tests/out/hlsl/do-while.hlsl index ca7d42e1e7..659624da79 100644 --- a/naga/tests/out/hlsl/do-while.hlsl +++ b/naga/tests/out/hlsl/do-while.hlsl @@ -1,7 +1,10 @@ void fb1_(inout bool cond) { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { bool _e1 = cond; if (!(_e1)) { diff --git a/naga/tests/out/hlsl/ray-query.hlsl b/naga/tests/out/hlsl/ray-query.hlsl index 9a0a2da1ce..4815d27c9a 100644 --- a/naga/tests/out/hlsl/ray-query.hlsl +++ b/naga/tests/out/hlsl/ray-query.hlsl @@ -84,7 +84,10 @@ RayIntersection query_loop(float3 pos, float3 dir, RaytracingAccelerationStructu RayQuery rq_1; rq_1.TraceRayInline(acs, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).flags, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).cull_mask, RayDescFromRayDesc_(ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir))); + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); const bool _e9 = rq_1.Proceed(); if (_e9) { } else { diff --git a/naga/tests/out/hlsl/shadow.hlsl b/naga/tests/out/hlsl/shadow.hlsl index c0431bfef9..18cc91992b 100644 --- a/naga/tests/out/hlsl/shadow.hlsl +++ b/naga/tests/out/hlsl/shadow.hlsl @@ -92,8 +92,11 @@ float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 uint i = 0u; float3 normal_1 = normalize(in_.world_normal); + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e40 = i; i = (_e40 + 1u); @@ -128,8 +131,11 @@ float4 fs_main_without_storage(FragmentInput_fs_main_without_storage fragmentinp uint i_1 = 0u; float3 normal_2 = normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { uint _e40 = i_1; i_1 = (_e40 + 1u); diff --git a/naga/tests/out/msl/atomicCompareExchange.msl b/naga/tests/out/msl/atomicCompareExchange.msl index 6655fad7e2..633f3226e0 100644 --- a/naga/tests/out/msl/atomicCompareExchange.msl +++ b/naga/tests/out/msl/atomicCompareExchange.msl @@ -76,8 +76,11 @@ kernel void test_atomic_compare_exchange_i32_( uint i = 0u; int old = {}; bool exchanged = {}; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e27 = i; i = _e27 + 1u; @@ -93,7 +96,10 @@ kernel void test_atomic_compare_exchange_i32_( int _e8 = metal::atomic_load_explicit(&arr_i32_.inner[_e6], metal::memory_order_relaxed); old = _e8; exchanged = false; + uint2 loop_bound_1 = uint2(0u); while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); bool _e12 = exchanged; if (!(_e12)) { } else { @@ -108,11 +114,8 @@ kernel void test_atomic_compare_exchange_i32_( old = _e23.old_value; exchanged = _e23.exchanged; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } } - LOOP_IS_BOUNDED } return; } @@ -124,8 +127,11 @@ kernel void test_atomic_compare_exchange_u32_( uint i_1 = 0u; uint old_1 = {}; bool exchanged_1 = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_1) { uint _e27 = i_1; i_1 = _e27 + 1u; @@ -141,7 +147,10 @@ kernel void test_atomic_compare_exchange_u32_( uint _e8 = metal::atomic_load_explicit(&arr_u32_.inner[_e6], metal::memory_order_relaxed); old_1 = _e8; exchanged_1 = false; + uint2 loop_bound_3 = uint2(0u); while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); bool _e12 = exchanged_1; if (!(_e12)) { } else { @@ -156,10 +165,8 @@ kernel void test_atomic_compare_exchange_u32_( old_1 = _e23.old_value; exchanged_1 = _e23.exchanged; } - LOOP_IS_BOUNDED } } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/boids.msl b/naga/tests/out/msl/boids.msl index a4d709bd09..f278d3ad68 100644 --- a/naga/tests/out/msl/boids.msl +++ b/naga/tests/out/msl/boids.msl @@ -55,8 +55,11 @@ kernel void main_( vPos = _e8; metal::float2 _e14 = particlesSrc.particles[index].vel; vVel = _e14; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e91 = i; i = _e91 + 1u; @@ -105,8 +108,6 @@ kernel void main_( int _e88 = cVelCount; cVelCount = as_type(as_type(_e88) + as_type(1)); } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } int _e94 = cMassCount; if (_e94 > 0) { diff --git a/naga/tests/out/msl/break-if.msl b/naga/tests/out/msl/break-if.msl index 4d3397234b..0336ac1b4c 100644 --- a/naga/tests/out/msl/break-if.msl +++ b/naga/tests/out/msl/break-if.msl @@ -7,16 +7,17 @@ using metal::uint; void breakIfEmpty( ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { if (true) { break; } } loop_init = false; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } @@ -26,8 +27,11 @@ void breakIfEmptyBody( ) { bool b = {}; bool c = {}; + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { b = a; bool _e2 = b; @@ -38,7 +42,6 @@ void breakIfEmptyBody( } } loop_init_1 = false; - LOOP_IS_BOUNDED } return; } @@ -48,8 +51,11 @@ void breakIf( ) { bool d = {}; bool e = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_2 = true; while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_2) { bool _e5 = e; if (a_1 == e) { @@ -60,7 +66,6 @@ void breakIf( d = a_1; bool _e2 = d; e = a_1 != _e2; - LOOP_IS_BOUNDED } return; } @@ -68,8 +73,11 @@ void breakIf( void breakIfSeparateVariable( ) { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u); bool loop_init_3 = true; while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); if (!loop_init_3) { uint _e5 = counter; if (counter == 5u) { @@ -79,7 +87,6 @@ void breakIfSeparateVariable( loop_init_3 = false; uint _e3 = counter; counter = _e3 + 1u; - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/collatz.msl b/naga/tests/out/msl/collatz.msl index e282d13abb..eef2c83c34 100644 --- a/naga/tests/out/msl/collatz.msl +++ b/naga/tests/out/msl/collatz.msl @@ -19,7 +19,10 @@ uint collatz_iterations( uint n = {}; uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); uint _e4 = n; if (_e4 > 1u) { } else { @@ -37,8 +40,6 @@ uint collatz_iterations( uint _e20 = i; i = _e20 + 1u; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } uint _e23 = i; return _e23; diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl index 1b35249f36..3c73656af7 100644 --- a/naga/tests/out/msl/control-flow.msl +++ b/naga/tests/out/msl/control-flow.msl @@ -31,7 +31,10 @@ void switch_case_break( void loop_switch_continue( int x ) { + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); switch(x) { case 1: { continue; @@ -40,8 +43,6 @@ void loop_switch_continue( break; } } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } @@ -51,7 +52,10 @@ void loop_switch_continue_nesting( int y, int z ) { + uint2 loop_bound_1 = uint2(0u); while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); switch(x_1) { case 1: { continue; @@ -62,7 +66,10 @@ void loop_switch_continue_nesting( continue; } default: { + uint2 loop_bound_2 = uint2(0u); while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); switch(z) { case 1: { continue; @@ -71,7 +78,6 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED } break; } @@ -87,9 +93,11 @@ void loop_switch_continue_nesting( continue; } } - LOOP_IS_BOUNDED } + uint2 loop_bound_3 = uint2(0u); while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); switch(y) { case 1: default: { @@ -101,7 +109,6 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED } return; } @@ -113,7 +120,10 @@ void loop_switch_omit_continue_variable_checks( int w ) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u); while(true) { + if (metal::all(loop_bound_4 == uint2(4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); switch(x_2) { case 1: { pos_1 = 1; @@ -123,9 +133,11 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED } + uint2 loop_bound_5 = uint2(0u); while(true) { + if (metal::all(loop_bound_5 == uint2(4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); switch(x_2) { case 1: { break; @@ -154,7 +166,6 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/do-while.msl b/naga/tests/out/msl/do-while.msl index 2a883304d1..af55bfcc56 100644 --- a/naga/tests/out/msl/do-while.msl +++ b/naga/tests/out/msl/do-while.msl @@ -8,8 +8,11 @@ using metal::uint; void fb1_( thread bool& cond ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { bool _e1 = cond; if (!(cond)) { @@ -18,8 +21,6 @@ void fb1_( } loop_init = false; continue; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/overrides-ray-query.msl b/naga/tests/out/msl/overrides-ray-query.msl index 3aa0ee0359..d70011159b 100644 --- a/naga/tests/out/msl/overrides-ray-query.msl +++ b/naga/tests/out/msl/overrides-ray-query.msl @@ -33,15 +33,16 @@ kernel void main_( rq.intersector.force_opacity((desc.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (desc.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((desc.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(desc.origin, desc.dir, desc.tmin, desc.tmax), acc_struct, desc.cull_mask); rq.ready = true; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool _e31 = rq.ready; rq.ready = false; if (_e31) { } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl index b8230fb2e8..58927b1f12 100644 --- a/naga/tests/out/msl/ray-query.msl +++ b/naga/tests/out/msl/ray-query.msl @@ -53,15 +53,16 @@ RayIntersection query_loop( rq_1.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq_1.intersector.accept_any_intersection((_e8.flags & 4) != 0); rq_1.intersection = rq_1.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask); rq_1.ready = true; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool _e9 = rq_1.ready; rq_1.ready = false; if (_e9) { } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return RayIntersection {_map_intersection_type(rq_1.intersection.type), rq_1.intersection.distance, rq_1.intersection.user_instance_id, rq_1.intersection.instance_id, {}, rq_1.intersection.geometry_id, rq_1.intersection.primitive_id, rq_1.intersection.triangle_barycentric_coord, rq_1.intersection.triangle_front_facing, {}, rq_1.intersection.object_to_world_transform, rq_1.intersection.world_to_object_transform}; } diff --git a/naga/tests/out/msl/shadow.msl b/naga/tests/out/msl/shadow.msl index 18cc842110..c8ad03b3a5 100644 --- a/naga/tests/out/msl/shadow.msl +++ b/naga/tests/out/msl/shadow.msl @@ -100,8 +100,11 @@ fragment fs_mainOutput fs_main( metal::float3 color = c_ambient; uint i = 0u; metal::float3 normal_1 = metal::normalize(in.world_normal); + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e40 = i; i = _e40 + 1u; @@ -123,8 +126,6 @@ fragment fs_mainOutput fs_main( metal::float3 _e37 = color; color = _e37 + ((_e23 * diffuse) * light.color.xyz); } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } metal::float3 _e42 = color; metal::float4 _e47 = u_entity.color; @@ -152,8 +153,11 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 color_1 = c_ambient; uint i_1 = 0u; metal::float3 normal_2 = metal::normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { uint _e40 = i_1; i_1 = _e40 + 1u; @@ -175,7 +179,6 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 _e37 = color_1; color_1 = _e37 + ((_e23 * diffuse_1) * light_1.color.xyz); } - LOOP_IS_BOUNDED } metal::float3 _e42 = color_1; metal::float4 _e47 = u_entity.color; diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index b9a825845a..5c99b038e7 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -280,12 +280,15 @@ impl super::Device { let needs_temp_options = stage.zero_initialize_workgroup_memory != layout.naga_options.zero_initialize_workgroup_memory - || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing; + || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing + || stage.module.runtime_checks.force_loop_bounding + != layout.naga_options.force_loop_bounding; let mut temp_options; let naga_options = if needs_temp_options { temp_options = layout.naga_options.clone(); temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; temp_options.restrict_indexing = stage.module.runtime_checks.bounds_checks; + temp_options.force_loop_bounding = stage.module.runtime_checks.force_loop_bounding; &temp_options } else { &layout.naga_options @@ -1241,6 +1244,7 @@ impl crate::Device for super::Device { push_constants_target, zero_initialize_workgroup_memory: true, restrict_indexing: true, + force_loop_bounding: true, }, }) }