From 445aebe7630f2c65a48aed6cfc8e558c4a2be456 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 27 May 2026 10:16:58 -0700 Subject: [PATCH 1/3] Make areConsecutiveInputsEqual more precise Collect the interfering effects specifically from the expressions that are evaluated in between the LHS and RHS fallthrough values. The previous code overapproximated this by collecting all effects from the LHS and RHS expressions, whether or not the effects were due to subexpressions that would have been evaluated before or after the LHS and RHS values. Also use a direction-aware effect ordering check. --- src/passes/OptimizeInstructions.cpp | 124 ++-- .../optimize-instructions-struct-rmw.wast | 674 +++++++++++++++++- 2 files changed, 740 insertions(+), 58 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index e61deed16aa..95c47e7752c 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -2788,69 +2788,79 @@ struct OptimizeInstructions // simple peephole optimizations - all we care about is a single instruction // at a time, and its inputs). bool areConsecutiveInputsEqual(Expression* left, Expression* right) { - // When we look for a tee/get pair, we can consider the fallthrough values - // for the first, as the fallthrough happens last (however, we must use - // NoTeeBrIf as we do not want to look through the tee). We cannot do this - // on the second, however, as there could be effects in the middle. - // TODO: Use effects here perhaps. - left = - Properties::getFallthrough(left, - getPassOptions(), - *getModule(), - Properties::FallthroughBehavior::NoTeeBrIf); - if (areMatchingTeeAndGet(left, right)) { - return true; + // The fallthrough expression of `left` produces its value. That value may + // depend on effects from other non-fallthrough expressions in `left`, but + // those expressions are generally executed before the fallthrough value and + // will affect the values of `left` and `right` equally, so we can ignore + // them. The exceptions are `local.tee` instructions and br_if conditions, + // which execute after the fallthrough and might affect only the value of + // `right`. + EffectAnalyzer interferingEffects(getPassOptions(), *getModule()); + bool matchingTeeAndGet = false; + while (true) { + left = + Properties::getFallthrough(left, + getPassOptions(), + *getModule(), + Properties::FallthroughBehavior::NoTeeBrIf); + if (auto* tee = left->dynCast()) { + assert(tee->isTee()); + // If `right` reads directly from this local.tee, then we know their + // values are the same. We know no children of this tee will be executed + // after it, so we need not look for further effects. But there might be + // interfering sets in previous br_if conditions, so we cannot just + // return here. + if (areMatchingTeeAndGet(left, right)) { + matchingTeeAndGet = true; + left = getFallthrough(left); + break; + } + interferingEffects.visit(tee); + left = tee->value; + continue; + } + if (auto* br = left->dynCast(); br && br->condition) { + assert(br->value); + interferingEffects.walk(br->condition); + left = br->value; + continue; + } + // We have found the real fallthrough expression. + break; } - // Ignore extraneous things and compare them syntactically. We can also - // look at the full fallthrough for both sides now. - auto* originalLeft = left; - left = getFallthrough(left); - auto* originalRight = right; - right = getFallthrough(right); - if (!ExpressionAnalyzer::equal(left, right)) { - return false; + // We similarly want to find the fallthrough expression of `right`. But this + // time, it is the expressions that execute before, not after, the + // fallthrough that can affect its value. + while (true) { + auto* next = Properties::getImmediateFallthrough( + right, getPassOptions(), *getModule()); + if (next == right) { + // We have found the fallthrough expression. + break; + } + // Gather the effects of all the non-fallthrough children of the + // container. + for (auto* child : ChildIterator(right)) { + if (child == next) { + // Skip children that execute after the fallthrough value, such as + // br_if conditions. + break; + } + interferingEffects.walk(child); + } + right = next; } - // We must also not have non-fallthrough effects that invalidate us, such as - // this situation: - // - // (local.get $x) - // (block - // (local.set $x ..) - // (local.get $x) - // ) - // - // The fallthroughs are identical, but the set may cause us to read a - // different value. - if (originalRight != right) { - // TODO: We could be more precise here and ignore right itself in - // originalRightEffects. - auto originalRightEffects = effects(originalRight); - auto rightEffects = effects(right); - if (originalRightEffects.invalidates(rightEffects)) { - return false; - } + // We have both fallthrough expressions. See if they look the same. + if (!matchingTeeAndGet && !ExpressionAnalyzer::equal(left, right)) { + return false; } - // The same, with left, as we can have this situation: - // - // (local.tee $x ..) - // (something using $x) - // ) - // (something using $x) - // - // The fallthroughs are identical, but the tee may cause us to read a - // different value. - if (originalLeft != left) { - auto originalLeftEffects = effects(originalLeft); - // |left == right| here (we would have exited early, otherwise, above), so - // we could compute either. Compute |left| as it might have better cache - // locality. - auto leftEffects = effects(left); - if (originalLeftEffects.invalidates(leftEffects)) { - return false; - } + // They do look the same! Make sure nothing executed in between them can + // affect the value of `right` and make it different from `left`. + if (interferingEffects.orderedBefore(effects(right))) { + return false; } // To be equal, they must also be known to return the same result diff --git a/test/lit/passes/optimize-instructions-struct-rmw.wast b/test/lit/passes/optimize-instructions-struct-rmw.wast index d8ea93ef4b4..4f196b53b5a 100644 --- a/test/lit/passes/optimize-instructions-struct-rmw.wast +++ b/test/lit/passes/optimize-instructions-struct-rmw.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. -;; RUN: wasm-opt %s -all --optimize-instructions --preserve-type-order -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --optimize-instructions --preserve-type-order -S -o - | filecheck %s (module ;; CHECK: (type $i32 (shared (struct (field (mut i32))))) @@ -1476,3 +1476,675 @@ ) ) ) + +(module + ;; Test the effect analysis in areConsecutiveInputsEqual by optimizing (or + ;; not) cmpxchg operations with matching expected and replacement operands. + ;; CHECK: (type $struct (shared (struct (field (mut i32))))) + (type $struct (shared (struct (field (mut i32))))) + ;; CHECK: (type $array (shared (array (mut i32)))) + (type $array (shared (array (mut i32)))) + + ;; CHECK: (func $release-store-before (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $temp + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-before (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (local $temp i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + ;; This tee does not affect the value of the right-hand side, so we can + ;; ignore it. + (local.tee $temp + (block (result i32) + ;; This release store is not ordered before the struct.get, so we know + ;; it cannot influence the struct.get's value, so we can optimize. + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (struct.get $struct 0 (local.get $struct)) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + + ;; CHECK: (func $acquire-load-before (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $temp + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-before (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (local $temp i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + ;; This tee does not affect the value of the right-hand side, so we can + ;; ignore it. + (local.tee $temp + (block (result i32) + ;; This acquire load is ordered before the struct.get, so can affect + ;; its value. However, it affects both struct.gets equally, so we can + ;; optimize anyway. (The code used to do a coarser check for ordering + ;; here that would prevent optimization.) + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (struct.get $struct 0 (local.get $struct)) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + + ;; CHECK: (func $release-store-middle-lhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-middle-lhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (br_if $l + ;; This is the fallthrough value. + (struct.get $struct 0 (local.get $struct)) + ;; The condition is executed after the fallthrough, so its effects can + ;; affect the right-hand side in principle. However, the release store + ;; does not affect the value, so we can still optimize. + (block (result i32) + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (i32.const 0) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $acquire-load-middle-lhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-middle-lhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (br_if $l + ;; This is the fallthrough value. + (struct.get $struct 0 (local.get $struct)) + ;; The acquire load could form a synchronization edge that forces the + ;; the right-hand struct.get to observe a different value, so it + ;; blocks optimization. + (block (result i32) + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (i32.const 0) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $release-store-middle-rhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-middle-rhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (block (result i32) + ;; This release store is executed between the LHS and RHS struct.gets, + ;; but does not change the value observed by the RHS, so does not block + ;; optimization. + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $acquire-load-middle-rhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-middle-rhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (block (result i32) + ;; This acquire load is executed between the LHS and RHS struct.gets and + ;; can create a synchronization edge forcing the RHS to observe a + ;; different value, so it blocks optimization. + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $release-store-after (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-after (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (br_if $l + ;; This is the RHS value. + (struct.get $struct 0 (local.get $struct)) + ;; The condition is executed after the RHS value is computed, so its + ;; effects cannot affect the values seen by the cmpxchg and can never + ;; block optimization. + ;; TODO: getImmediateFallthroughPtr "helpfully" only allows + ;; fallthrough via br_if when the value and condition can be + ;; reordered, so unnecessarily blocks this optimization. + (block (result i32) + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (i32.const 0) + ) + ) + ) + ) + ) + + ;; CHECK: (func $acquire-load-after (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-after (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (br_if $l + ;; This is the RHS value. + (struct.get $struct 0 (local.get $struct)) + ;; The condition is executed after the RHS value is computed, so its + ;; effects cannot affect the values seen by the cmpxchg and can never + ;; block optimization. + ;; TODO: getImmediateFallthroughPtr "helpfully" only allows + ;; fallthrough via br_if when the value and condition can be + ;; reordered. In this case the value and condition _can_ be reordered, + ;; but the reordering check is currently symmetric, so the + ;; optimization is still blocked. + (block (result i32) + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (i32.const 0) + ) + ) + ) + ) + ) + + ;; CHECK: (func $outer-tee-match (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $y i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (local.tee $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $outer-tee-match (param $struct (ref $struct)) (result i32) + (local $x i32) + (local $y i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (local.tee $y + (i32.const 0) + ) + ) + ;; We recognize that this comes from the LHS tee and can optimize. + (local.get $x) + ) + ) + + ;; CHECK: (func $inner-tee-match (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $y i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (local.tee $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $inner-tee-match (param $struct (ref $struct)) (result i32) + (local $x i32) + (local $y i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (local.tee $y + (i32.const 0) + ) + ) + ;; We recognize that this comes from the LHS tee and can optimize. + (local.get $y) + ) + ) + + ;; CHECK: (func $br_if-after-tee (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_if-after-tee (param $struct (ref $struct)) (result i32) + (local $x i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (br_if $l + (local.tee $x + (i32.const 0) + ) + ;; The br_if condition is evaluated after the local.tee, so it + ;; interferes and blocks optimization. + (block (result i32) + (local.set $x (i32.const 2)) + (i32.const 0) + ) + ) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $tee-interference (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-interference (param $struct (ref $struct)) (result i32) + (local $x i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + ;; The fallthrough values depends on $x, so the optimization is blocked. + (i32.add (local.get $x) (i32.const 1)) + ) + (i32.add (local.get $x) (i32.const 1)) + ) + ) + + ;; CHECK: (func $tee-set-before (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-before (param $struct (ref $struct)) (result i32) + (local $x i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (block (result i32) + ;; This set is executed before the tee, so it does not block + ;; optimization. + (local.set $x (i32.const 1)) + (i32.const 0) + ) + ) + (local.get $x) + ) + ) + + ;; CHECK: (func $tee-set-middle-lhs (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-middle-lhs (param $struct (ref $struct)) (result i32) + (local $x i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (br_if $l + (i32.const 0) + ;; The condition is evaluated after the LHS value, so this would + ;; normally block optimization. However, it is evaluated before the + ;; local.tee, so actually it is fine. + (block (result i32) + (local.set $x (i32.const 1)) + (i32.const 0) + ) + ) + ) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $tee-set-middle-rhs (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-middle-rhs (param $struct (ref $struct)) (result i32) + (local $x i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (i32.const 1) + ) + (block (result i32) + ;; This set change the value of $x and blocks optimization. + (local.set $x (i32.const 2)) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $tee-set-after (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-after (param $struct (ref $struct)) (result i32) + (local $x i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (i32.const 1) + ) + (br_if $l + (local.get $x) + ;; The condition is evaluated after the value, so its effects do not + ;; block optimization. + ;; TODO: getImmediateFallthroughPtr once again blocks this + ;; optimization. + (block (result i32) + (local.set $x (i32.const 2)) + (i32.const 0) + ) + ) + ) + ) + ) +) From 70a89892a21348dcaaeaa869d78923586f08bbe7 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 28 May 2026 10:53:47 -0700 Subject: [PATCH 2/3] add NB comment and several TODOs --- src/passes/OptimizeInstructions.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 95c47e7752c..47964f33713 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -2795,6 +2795,11 @@ struct OptimizeInstructions // them. The exceptions are `local.tee` instructions and br_if conditions, // which execute after the fallthrough and might affect only the value of // `right`. + // TODO: We should use a custom getFallthrough that ignores whether br_if + // conditions and values can be reordered, since we can handle that more + // precisely here. + // TODO: When the fallthrough is an If (meaning the other branch must never + // return), we should ignore effects in that non-returning branch. EffectAnalyzer interferingEffects(getPassOptions(), *getModule()); bool matchingTeeAndGet = false; while (true) { @@ -2810,6 +2815,8 @@ struct OptimizeInstructions // after it, so we need not look for further effects. But there might be // interfering sets in previous br_if conditions, so we cannot just // return here. + // TODO: Calculate `right`'s fallthrough first in case the fallthrough + // is the matching get. if (areMatchingTeeAndGet(left, right)) { matchingTeeAndGet = true; left = getFallthrough(left); @@ -2821,6 +2828,9 @@ struct OptimizeInstructions } if (auto* br = left->dynCast(); br && br->condition) { assert(br->value); + // NB: We don't need to worry about the branch effect because any branch + // at runtime must skip past the parent expression, so it would not + // matter how that parent expression gets optimized. interferingEffects.walk(br->condition); left = br->value; continue; From 24f50fd5978b2c598cba663f3cc2a84d5b824f21 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 28 May 2026 13:37:43 -0700 Subject: [PATCH 3/3] typos and comment placement --- test/lit/passes/optimize-instructions-struct-rmw.wast | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/lit/passes/optimize-instructions-struct-rmw.wast b/test/lit/passes/optimize-instructions-struct-rmw.wast index 4f196b53b5a..19893460361 100644 --- a/test/lit/passes/optimize-instructions-struct-rmw.wast +++ b/test/lit/passes/optimize-instructions-struct-rmw.wast @@ -1477,9 +1477,9 @@ ) ) +;; Test the effect analysis in areConsecutiveInputsEqual by optimizing (or not) +;; cmpxchg operations with matching expected and replacement operands. (module - ;; Test the effect analysis in areConsecutiveInputsEqual by optimizing (or - ;; not) cmpxchg operations with matching expected and replacement operands. ;; CHECK: (type $struct (shared (struct (field (mut i32))))) (type $struct (shared (struct (field (mut i32))))) ;; CHECK: (type $array (shared (array (mut i32)))) @@ -1979,7 +1979,7 @@ (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 (local.get $struct) (local.tee $x - ;; The fallthrough values depends on $x, so the optimization is blocked. + ;; The fallthrough values depend on $x, so the optimization is blocked. (i32.add (local.get $x) (i32.const 1)) ) (i32.add (local.get $x) (i32.const 1)) @@ -2098,7 +2098,7 @@ (i32.const 1) ) (block (result i32) - ;; This set change the value of $x and blocks optimization. + ;; This set changes the value of $x and blocks optimization. (local.set $x (i32.const 2)) (local.get $x) )