diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index dcce866bac..1a80dcbb64 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -211,6 +211,9 @@ pub struct Options { pub zero_initialize_workgroup_memory: bool, /// Should we restrict indexing of vectors, matrices and arrays? pub restrict_indexing: bool, + /// If set, loops will have code injected into them, forcing the compiler + /// to think the number of iterations is bounded. + pub force_loop_bounding: bool, } impl Default for Options { @@ -223,6 +226,7 @@ impl Default for Options { push_constants_target: None, zero_initialize_workgroup_memory: true, restrict_indexing: true, + force_loop_bounding: true, } } } diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index b5df135766..6914869969 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -131,6 +131,33 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.need_bake_expressions.clear(); } + /// Generates statements to be inserted immediately before and at the very + /// start of the body of each loop, to defeat infinite loop reasoning. + /// The 0th item of the returned tuple should be inserted immediately prior + /// to the loop and the 1st item should be inserted at the very start of + /// the loop body. + /// + /// See [`back::msl::Writer::gen_force_bounded_loop_statements`] for details. + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + ) -> Option<(String, String)> { + if !self.options.force_loop_bounding { + return None; + } + + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u, 0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if (all({loop_bound_name} == uint2({max}u, {max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); + + Some((decl, break_and_inc)) + } + /// Helper method used to find which expressions of a given function require baking /// /// # Notes @@ -2048,12 +2075,24 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ref continuing, break_if, } => { + let force_loop_bound_statements = self.gen_force_bounded_loop_statements(level); + let gate_name = (!continuing.is_empty() || break_if.is_some()) + .then(|| self.namer.call("loop_init")); + + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } + if let Some(ref gate_name) = gate_name { + writeln!(self.out, "{level}bool {gate_name} = true;")?; + } + self.continue_ctx.enter_loop(); + writeln!(self.out, "{level}while(true) {{")?; + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; + } let l2 = level.next(); - if !continuing.is_empty() || break_if.is_some() { - let gate_name = self.namer.call("loop_init"); - writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!(self.out, "{level}while(true) {{")?; + if let Some(gate_name) = gate_name { writeln!(self.out, "{l2}if (!{gate_name}) {{")?; let l3 = l2.next(); for sta in continuing.iter() { @@ -2068,13 +2107,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } writeln!(self.out, "{l2}}}")?; writeln!(self.out, "{l2}{gate_name} = false;")?; - } else { - writeln!(self.out, "{level}while(true) {{")?; } for sta in body.iter() { self.write_stmt(module, sta, func_ctx, l2)?; } + writeln!(self.out, "{level}}}")?; self.continue_ctx.exit_loop(); } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index cdfd62129b..396e898083 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -383,11 +383,6 @@ pub struct Writer { /// Set of (struct type, struct field index) denoting which fields require /// padding inserted **before** them (i.e. between fields at index - 1 and index) struct_member_pads: FastHashSet<(Handle, u32)>, - - /// Name of the force-bounded-loop macro. - /// - /// See `emit_force_bounded_loop_macro` for details. - force_bounded_loop_macro_name: String, } impl crate::Scalar { @@ -601,7 +596,7 @@ struct ExpressionContext<'a> { /// accesses. These may need to be cached in temporary variables. See /// `index::find_checked_indexes` for details. guarded_indices: HandleSet, - /// See [`Writer::emit_force_bounded_loop_macro`] for details. + /// See [`Writer::gen_force_bounded_loop_statements`] for details. force_loop_bounding: bool, } @@ -685,7 +680,6 @@ impl Writer { #[cfg(test)] put_block_stack_pointers: Default::default(), struct_member_pads: FastHashSet::default(), - force_bounded_loop_macro_name: String::default(), } } @@ -696,17 +690,11 @@ impl Writer { self.out } - /// Define a macro to invoke at the bottom of each loop body, to - /// defeat MSL infinite loop reasoning. - /// - /// If we haven't done so already, emit the definition of a preprocessor - /// macro to be invoked at the end of each loop body in the generated MSL, - /// to ensure that the MSL compiler's optimizations do not remove bounds - /// checks. - /// - /// Only the first call to this function for a given module actually causes - /// the macro definition to be written. Subsequent loops can simply use the - /// prior macro definition, since macros aren't block-scoped. + /// Generates statements to be inserted immediately before and at the very + /// start of the body of each loop, to defeat MSL infinite loop reasoning. + /// The 0th item of the returned tuple should be inserted immediately prior + /// to the loop and the 1st item should be inserted at the very start of + /// the loop body. /// /// # What is this trying to solve? /// @@ -774,7 +762,8 @@ impl Writer { /// but which in fact generates no instructions. Unfortunately, inline /// assembly is not handled correctly by some Metal device drivers. /// - /// Instead, we add the following code to the bottom of every loop: + /// A previously used approach was to add the following code to the bottom + /// of every loop: /// /// ```ignore /// if (volatile bool unpredictable = false; unpredictable) @@ -785,37 +774,47 @@ impl Writer { /// the `volatile` qualifier prevents the compiler from assuming this. Thus, /// it must assume that the `break` might be reached, and hence that the /// loop is not unbounded. This prevents the range analysis impact described - /// above. + /// above. Unfortunately this prevented the compiler from making important, + /// and safe, optimizations such as loop unrolling and was observed to + /// significantly hurt performance. /// - /// Unfortunately, what makes this a kludge, not a hack, is that this - /// solution leaves the GPU executing a pointless conditional branch, at - /// runtime, in every iteration of the loop. There's no part of the system - /// that has a global enough view to be sure that `unpredictable` is true, - /// and remove it from the code. Adding the branch also affects - /// optimization: for example, it's impossible to unroll this loop. This - /// transformation has been observed to significantly hurt performance. + /// Our current approach declares a counter before every loop and + /// increments it every iteration, breaking after 2^64 iterations: + /// + /// ```ignore + /// uint2 loop_bound = uint2(0); + /// while (true) { + /// if (metal::all(loop_bound == uint2(4294967295))) { break; } + /// loop_bound += uint2(loop_bound.y == 4294967295, 1); + /// } + /// ``` /// - /// To make our output a bit more legible, we pull the condition out into a - /// preprocessor macro defined at the top of the module. + /// This convinces the compiler that the loop is finite and therefore may + /// execute, whilst at the same time allowing optimizations such as loop + /// unrolling. Furthermore the 64-bit counter is large enough it seems + /// implausible that it would affect the execution of any shader. /// /// This approach is also used by Chromium WebGPU's Dawn shader compiler: - /// - fn emit_force_bounded_loop_macro(&mut self) -> BackendResult { - if !self.force_bounded_loop_macro_name.is_empty() { - return Ok(()); + /// + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + context: &StatementContext, + ) -> Option<(String, String)> { + if !context.expression.force_loop_bounding { + return None; } - self.force_bounded_loop_macro_name = self.namer.call("LOOP_IS_BOUNDED"); - let loop_bounded_volatile_name = self.namer.call("unpredictable_break_from_loop"); - writeln!( - self.out, - "#define {} {{ volatile bool {} = false; if ({}) break; }}", - self.force_bounded_loop_macro_name, - loop_bounded_volatile_name, - loop_bounded_volatile_name, - )?; + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if ({NAMESPACE}::all({loop_bound_name} == uint2({max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); - Ok(()) + Some((decl, break_and_inc)) } fn put_call_parameters( @@ -3201,10 +3200,23 @@ impl Writer { ref continuing, break_if, } => { - if !continuing.is_empty() || break_if.is_some() { - let gate_name = self.namer.call("loop_init"); + let force_loop_bound_statements = + self.gen_force_bounded_loop_statements(level, context); + let gate_name = (!continuing.is_empty() || break_if.is_some()) + .then(|| self.namer.call("loop_init")); + + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } + if let Some(ref gate_name) = gate_name { writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!(self.out, "{level}while(true) {{",)?; + } + + writeln!(self.out, "{level}while(true) {{",)?; + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; + } + if let Some(ref gate_name) = gate_name { let lif = level.next(); let lcontinuing = lif.next(); writeln!(self.out, "{lif}if (!{gate_name}) {{")?; @@ -3218,19 +3230,9 @@ impl Writer { } writeln!(self.out, "{lif}}}")?; writeln!(self.out, "{lif}{gate_name} = false;")?; - } else { - writeln!(self.out, "{level}while(true) {{",)?; } self.put_block(level.next(), body, context)?; - if context.expression.force_loop_bounding { - self.emit_force_bounded_loop_macro()?; - writeln!( - self.out, - "{}{}", - level.next(), - self.force_bounded_loop_macro_name - )?; - } + writeln!(self.out, "{level}}}")?; } crate::Statement::Break => { @@ -3724,7 +3726,6 @@ impl Writer { &[CLAMPED_LOD_LOAD_PREFIX], &mut self.names, ); - self.force_bounded_loop_macro_name.clear(); self.struct_member_pads.clear(); writeln!( diff --git a/naga/tests/out/hlsl/boids.hlsl b/naga/tests/out/hlsl/boids.hlsl index 22e9c6cefd..8934a9bca2 100644 --- a/naga/tests/out/hlsl/boids.hlsl +++ b/naga/tests/out/hlsl/boids.hlsl @@ -41,8 +41,11 @@ void main(uint3 global_invocation_id : SV_DispatchThreadID) vPos = _e8; float2 _e14 = asfloat(particlesSrc.Load2(8+index*16+0)); vVel = _e14; + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e91 = i; i = (_e91 + 1u); diff --git a/naga/tests/out/hlsl/break-if.hlsl b/naga/tests/out/hlsl/break-if.hlsl index 63a0185583..cb10886543 100644 --- a/naga/tests/out/hlsl/break-if.hlsl +++ b/naga/tests/out/hlsl/break-if.hlsl @@ -1,7 +1,10 @@ void breakIfEmpty() { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { if (true) { break; @@ -17,8 +20,11 @@ void breakIfEmptyBody(bool a) bool b = (bool)0; bool c = (bool)0; + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { b = a; bool _e2 = b; @@ -38,8 +44,11 @@ void breakIf(bool a_1) bool d = (bool)0; bool e = (bool)0; + uint2 loop_bound_2 = uint2(0u, 0u); bool loop_init_2 = true; while(true) { + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_2) { bool _e5 = e; if ((a_1 == _e5)) { @@ -58,8 +67,11 @@ void breakIfSeparateVariable() { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u, 0u); bool loop_init_3 = true; while(true) { + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); if (!loop_init_3) { uint _e5 = counter; if ((_e5 == 5u)) { diff --git a/naga/tests/out/hlsl/collatz.hlsl b/naga/tests/out/hlsl/collatz.hlsl index b00586aa4c..3a250a4f25 100644 --- a/naga/tests/out/hlsl/collatz.hlsl +++ b/naga/tests/out/hlsl/collatz.hlsl @@ -6,7 +6,10 @@ uint collatz_iterations(uint n_base) uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); uint _e4 = n; if ((_e4 > 1u)) { } else { diff --git a/naga/tests/out/hlsl/control-flow.hlsl b/naga/tests/out/hlsl/control-flow.hlsl index 2438858a8a..7a8cb73779 100644 --- a/naga/tests/out/hlsl/control-flow.hlsl +++ b/naga/tests/out/hlsl/control-flow.hlsl @@ -20,7 +20,10 @@ void switch_case_break() void loop_switch_continue(int x) { + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool should_continue = false; switch(x) { case 1: { @@ -40,7 +43,10 @@ void loop_switch_continue(int x) void loop_switch_continue_nesting(int x_1, int y, int z) { + uint2 loop_bound_1 = uint2(0u, 0u); while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); bool should_continue_1 = false; switch(x_1) { case 1: { @@ -54,7 +60,10 @@ void loop_switch_continue_nesting(int x_1, int y, int z) break; } default: { + uint2 loop_bound_2 = uint2(0u, 0u); while(true) { + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); bool should_continue_2 = false; switch(z) { case 1: { @@ -93,7 +102,10 @@ void loop_switch_continue_nesting(int x_1, int y, int z) continue; } } + uint2 loop_bound_3 = uint2(0u, 0u); while(true) { + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); bool should_continue_4 = false; do { do { @@ -115,7 +127,10 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u, 0u); while(true) { + if (all(loop_bound_4 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); bool should_continue_5 = false; switch(x_2) { case 1: { @@ -127,7 +142,10 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) } } } + uint2 loop_bound_5 = uint2(0u, 0u); while(true) { + if (all(loop_bound_5 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); bool should_continue_6 = false; switch(x_2) { case 1: { diff --git a/naga/tests/out/hlsl/do-while.hlsl b/naga/tests/out/hlsl/do-while.hlsl index ca7d42e1e7..659624da79 100644 --- a/naga/tests/out/hlsl/do-while.hlsl +++ b/naga/tests/out/hlsl/do-while.hlsl @@ -1,7 +1,10 @@ void fb1_(inout bool cond) { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { bool _e1 = cond; if (!(_e1)) { diff --git a/naga/tests/out/hlsl/ray-query.hlsl b/naga/tests/out/hlsl/ray-query.hlsl index 9a0a2da1ce..4815d27c9a 100644 --- a/naga/tests/out/hlsl/ray-query.hlsl +++ b/naga/tests/out/hlsl/ray-query.hlsl @@ -84,7 +84,10 @@ RayIntersection query_loop(float3 pos, float3 dir, RaytracingAccelerationStructu RayQuery rq_1; rq_1.TraceRayInline(acs, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).flags, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).cull_mask, RayDescFromRayDesc_(ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir))); + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); const bool _e9 = rq_1.Proceed(); if (_e9) { } else { diff --git a/naga/tests/out/hlsl/shadow.hlsl b/naga/tests/out/hlsl/shadow.hlsl index c0431bfef9..18cc91992b 100644 --- a/naga/tests/out/hlsl/shadow.hlsl +++ b/naga/tests/out/hlsl/shadow.hlsl @@ -92,8 +92,11 @@ float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 uint i = 0u; float3 normal_1 = normalize(in_.world_normal); + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e40 = i; i = (_e40 + 1u); @@ -128,8 +131,11 @@ float4 fs_main_without_storage(FragmentInput_fs_main_without_storage fragmentinp uint i_1 = 0u; float3 normal_2 = normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { uint _e40 = i_1; i_1 = (_e40 + 1u); diff --git a/naga/tests/out/msl/atomicCompareExchange.msl b/naga/tests/out/msl/atomicCompareExchange.msl index 6655fad7e2..633f3226e0 100644 --- a/naga/tests/out/msl/atomicCompareExchange.msl +++ b/naga/tests/out/msl/atomicCompareExchange.msl @@ -76,8 +76,11 @@ kernel void test_atomic_compare_exchange_i32_( uint i = 0u; int old = {}; bool exchanged = {}; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e27 = i; i = _e27 + 1u; @@ -93,7 +96,10 @@ kernel void test_atomic_compare_exchange_i32_( int _e8 = metal::atomic_load_explicit(&arr_i32_.inner[_e6], metal::memory_order_relaxed); old = _e8; exchanged = false; + uint2 loop_bound_1 = uint2(0u); while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); bool _e12 = exchanged; if (!(_e12)) { } else { @@ -108,11 +114,8 @@ kernel void test_atomic_compare_exchange_i32_( old = _e23.old_value; exchanged = _e23.exchanged; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } } - LOOP_IS_BOUNDED } return; } @@ -124,8 +127,11 @@ kernel void test_atomic_compare_exchange_u32_( uint i_1 = 0u; uint old_1 = {}; bool exchanged_1 = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_1) { uint _e27 = i_1; i_1 = _e27 + 1u; @@ -141,7 +147,10 @@ kernel void test_atomic_compare_exchange_u32_( uint _e8 = metal::atomic_load_explicit(&arr_u32_.inner[_e6], metal::memory_order_relaxed); old_1 = _e8; exchanged_1 = false; + uint2 loop_bound_3 = uint2(0u); while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); bool _e12 = exchanged_1; if (!(_e12)) { } else { @@ -156,10 +165,8 @@ kernel void test_atomic_compare_exchange_u32_( old_1 = _e23.old_value; exchanged_1 = _e23.exchanged; } - LOOP_IS_BOUNDED } } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/boids.msl b/naga/tests/out/msl/boids.msl index a4d709bd09..f278d3ad68 100644 --- a/naga/tests/out/msl/boids.msl +++ b/naga/tests/out/msl/boids.msl @@ -55,8 +55,11 @@ kernel void main_( vPos = _e8; metal::float2 _e14 = particlesSrc.particles[index].vel; vVel = _e14; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e91 = i; i = _e91 + 1u; @@ -105,8 +108,6 @@ kernel void main_( int _e88 = cVelCount; cVelCount = as_type(as_type(_e88) + as_type(1)); } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } int _e94 = cMassCount; if (_e94 > 0) { diff --git a/naga/tests/out/msl/break-if.msl b/naga/tests/out/msl/break-if.msl index 4d3397234b..0336ac1b4c 100644 --- a/naga/tests/out/msl/break-if.msl +++ b/naga/tests/out/msl/break-if.msl @@ -7,16 +7,17 @@ using metal::uint; void breakIfEmpty( ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { if (true) { break; } } loop_init = false; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } @@ -26,8 +27,11 @@ void breakIfEmptyBody( ) { bool b = {}; bool c = {}; + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { b = a; bool _e2 = b; @@ -38,7 +42,6 @@ void breakIfEmptyBody( } } loop_init_1 = false; - LOOP_IS_BOUNDED } return; } @@ -48,8 +51,11 @@ void breakIf( ) { bool d = {}; bool e = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_2 = true; while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_2) { bool _e5 = e; if (a_1 == e) { @@ -60,7 +66,6 @@ void breakIf( d = a_1; bool _e2 = d; e = a_1 != _e2; - LOOP_IS_BOUNDED } return; } @@ -68,8 +73,11 @@ void breakIf( void breakIfSeparateVariable( ) { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u); bool loop_init_3 = true; while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); if (!loop_init_3) { uint _e5 = counter; if (counter == 5u) { @@ -79,7 +87,6 @@ void breakIfSeparateVariable( loop_init_3 = false; uint _e3 = counter; counter = _e3 + 1u; - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/collatz.msl b/naga/tests/out/msl/collatz.msl index e282d13abb..eef2c83c34 100644 --- a/naga/tests/out/msl/collatz.msl +++ b/naga/tests/out/msl/collatz.msl @@ -19,7 +19,10 @@ uint collatz_iterations( uint n = {}; uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); uint _e4 = n; if (_e4 > 1u) { } else { @@ -37,8 +40,6 @@ uint collatz_iterations( uint _e20 = i; i = _e20 + 1u; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } uint _e23 = i; return _e23; diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl index 1b35249f36..3c73656af7 100644 --- a/naga/tests/out/msl/control-flow.msl +++ b/naga/tests/out/msl/control-flow.msl @@ -31,7 +31,10 @@ void switch_case_break( void loop_switch_continue( int x ) { + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); switch(x) { case 1: { continue; @@ -40,8 +43,6 @@ void loop_switch_continue( break; } } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } @@ -51,7 +52,10 @@ void loop_switch_continue_nesting( int y, int z ) { + uint2 loop_bound_1 = uint2(0u); while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); switch(x_1) { case 1: { continue; @@ -62,7 +66,10 @@ void loop_switch_continue_nesting( continue; } default: { + uint2 loop_bound_2 = uint2(0u); while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); switch(z) { case 1: { continue; @@ -71,7 +78,6 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED } break; } @@ -87,9 +93,11 @@ void loop_switch_continue_nesting( continue; } } - LOOP_IS_BOUNDED } + uint2 loop_bound_3 = uint2(0u); while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); switch(y) { case 1: default: { @@ -101,7 +109,6 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED } return; } @@ -113,7 +120,10 @@ void loop_switch_omit_continue_variable_checks( int w ) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u); while(true) { + if (metal::all(loop_bound_4 == uint2(4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); switch(x_2) { case 1: { pos_1 = 1; @@ -123,9 +133,11 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED } + uint2 loop_bound_5 = uint2(0u); while(true) { + if (metal::all(loop_bound_5 == uint2(4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); switch(x_2) { case 1: { break; @@ -154,7 +166,6 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/do-while.msl b/naga/tests/out/msl/do-while.msl index 2a883304d1..af55bfcc56 100644 --- a/naga/tests/out/msl/do-while.msl +++ b/naga/tests/out/msl/do-while.msl @@ -8,8 +8,11 @@ using metal::uint; void fb1_( thread bool& cond ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { bool _e1 = cond; if (!(cond)) { @@ -18,8 +21,6 @@ void fb1_( } loop_init = false; continue; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/overrides-ray-query.msl b/naga/tests/out/msl/overrides-ray-query.msl index 3aa0ee0359..d70011159b 100644 --- a/naga/tests/out/msl/overrides-ray-query.msl +++ b/naga/tests/out/msl/overrides-ray-query.msl @@ -33,15 +33,16 @@ kernel void main_( rq.intersector.force_opacity((desc.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (desc.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((desc.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(desc.origin, desc.dir, desc.tmin, desc.tmax), acc_struct, desc.cull_mask); rq.ready = true; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool _e31 = rq.ready; rq.ready = false; if (_e31) { } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl index b8230fb2e8..58927b1f12 100644 --- a/naga/tests/out/msl/ray-query.msl +++ b/naga/tests/out/msl/ray-query.msl @@ -53,15 +53,16 @@ RayIntersection query_loop( rq_1.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq_1.intersector.accept_any_intersection((_e8.flags & 4) != 0); rq_1.intersection = rq_1.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask); rq_1.ready = true; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool _e9 = rq_1.ready; rq_1.ready = false; if (_e9) { } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return RayIntersection {_map_intersection_type(rq_1.intersection.type), rq_1.intersection.distance, rq_1.intersection.user_instance_id, rq_1.intersection.instance_id, {}, rq_1.intersection.geometry_id, rq_1.intersection.primitive_id, rq_1.intersection.triangle_barycentric_coord, rq_1.intersection.triangle_front_facing, {}, rq_1.intersection.object_to_world_transform, rq_1.intersection.world_to_object_transform}; } diff --git a/naga/tests/out/msl/shadow.msl b/naga/tests/out/msl/shadow.msl index 18cc842110..c8ad03b3a5 100644 --- a/naga/tests/out/msl/shadow.msl +++ b/naga/tests/out/msl/shadow.msl @@ -100,8 +100,11 @@ fragment fs_mainOutput fs_main( metal::float3 color = c_ambient; uint i = 0u; metal::float3 normal_1 = metal::normalize(in.world_normal); + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e40 = i; i = _e40 + 1u; @@ -123,8 +126,6 @@ fragment fs_mainOutput fs_main( metal::float3 _e37 = color; color = _e37 + ((_e23 * diffuse) * light.color.xyz); } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } metal::float3 _e42 = color; metal::float4 _e47 = u_entity.color; @@ -152,8 +153,11 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 color_1 = c_ambient; uint i_1 = 0u; metal::float3 normal_2 = metal::normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { uint _e40 = i_1; i_1 = _e40 + 1u; @@ -175,7 +179,6 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 _e37 = color_1; color_1 = _e37 + ((_e23 * diffuse_1) * light_1.color.xyz); } - LOOP_IS_BOUNDED } metal::float3 _e42 = color_1; metal::float4 _e47 = u_entity.color; diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index b9a825845a..5c99b038e7 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -280,12 +280,15 @@ impl super::Device { let needs_temp_options = stage.zero_initialize_workgroup_memory != layout.naga_options.zero_initialize_workgroup_memory - || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing; + || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing + || stage.module.runtime_checks.force_loop_bounding + != layout.naga_options.force_loop_bounding; let mut temp_options; let naga_options = if needs_temp_options { temp_options = layout.naga_options.clone(); temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; temp_options.restrict_indexing = stage.module.runtime_checks.bounds_checks; + temp_options.force_loop_bounding = stage.module.runtime_checks.force_loop_bounding; &temp_options } else { &layout.naga_options @@ -1241,6 +1244,7 @@ impl crate::Device for super::Device { push_constants_target, zero_initialize_workgroup_memory: true, restrict_indexing: true, + force_loop_bounding: true, }, }) }