WebAssembly · brendandahl · May 12, 2026 · May 14, 2026 · May 14, 2026 · tlively
@@ -115,6 +115,7 @@ set(passes_SOURCES
   RemoveUnusedModuleElements.cpp
   RemoveUnusedTypes.cpp
   ReorderFunctions.cpp
+  ReorderFunctionsBySimilarity.cpp
   ReorderGlobals.cpp
   ReorderLocals.cpp
   ReorderTypes.cpp

@@ -0,0 +1,157 @@
+/*
+ * Copyright 2026 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Sorts functions by structural similarity. This groups mutually-compressible
+// instruction sequences together, maximizing subsequent compression ratio
+// (e.g., Gzip/Brotli).
+//
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "ir/module-utils.h"
+#include "ir/utils.h"
+#include "pass.h"
+#include "wasm.h"
+
+namespace wasm {
+
+// Post-order traversal visitor to extract instruction sequence
+struct OpcodeSequenceBuilder
+  : public PostWalker<OpcodeSequenceBuilder,
+                      UnifiedExpressionVisitor<OpcodeSequenceBuilder>> {
+  std::vector<uint32_t> sequence;
+  static const size_t MaxLen = 512;
+
+  void visitExpression(Expression* curr) {
+    if (sequence.size() >= MaxLen) {
+      return;
+    }
+    // Append the core expression ID
+    sequence.push_back(curr->_id);
+
+    // Capture important immediate type/operator information
+    // TODO: There's probably more data that would be useful to capture.
+    if (auto* unary = curr->dynCast<Unary>()) {
+      sequence.push_back(unary->op);
+    } else if (auto* binary = curr->dynCast<Binary>()) {
+      sequence.push_back(binary->op);
+    } else if (auto* load = curr->dynCast<Load>()) {
+      sequence.push_back(load->bytes);
+      sequence.push_back(load->offset);
+    } else if (auto* store = curr->dynCast<Store>()) {
+      sequence.push_back(store->bytes);
+      sequence.push_back(store->offset);
+    } else if (auto* localGet = curr->dynCast<LocalGet>()) {
+      sequence.push_back(localGet->type.getID());
+    } else if (auto* localSet = curr->dynCast<LocalSet>()) {
+      sequence.push_back(localSet->type.getID());
+    } else if (auto* const_ = curr->dynCast<Const>()) {
+      sequence.push_back(const_->type.getID());
+    }
+  }
+};
+
+struct ReorderFunctionsBySimilarity : public Pass {
+  bool requiresNonNullableLocalFixups() override { return false; }
+
+  void run(Module* module) override {
+    struct FunctionSimilarityInfo {
+      std::string typeStr;
+      std::vector<std::string> varsStrs;
+      std::vector<uint32_t> opcodeSequence;
+    };
+
+    ModuleUtils::ParallelFunctionAnalysis<FunctionSimilarityInfo> analysis(
+      *module, [&](Function* func, FunctionSimilarityInfo& info) {
+        if (func->imported()) {
+          return;
+        }
+        info.typeStr = func->type.toString();
+        info.varsStrs.reserve(func->vars.size());
+        for (auto var : func->vars) {
+          info.varsStrs.push_back(var.toString());
+        }
+        OpcodeSequenceBuilder builder;
+        builder.walk(func->body);
+        info.opcodeSequence = std::move(builder.sequence);
+      });
+
+    struct FunctionSortKey {
+      std::unique_ptr<Function> func;
+      std::string typeStr;
+      std::vector<std::string> varsStrs;
+      std::vector<uint32_t> opcodeSequence;
+      size_t originalIndex;
+
+      bool operator<(const FunctionSortKey& other) const {
+        if (typeStr != other.typeStr) {
+          return typeStr < other.typeStr;
+        }
+        if (varsStrs != other.varsStrs) {
+          return varsStrs < other.varsStrs;
+        }
+        if (opcodeSequence != other.opcodeSequence) {
+          return opcodeSequence < other.opcodeSequence;
+        }
+        return originalIndex < other.originalIndex;
+      }
+    };
+
+    // 1. Separate imported and defined functions, and build sort keys
+    std::vector<std::unique_ptr<Function>> importedFuncs;
+    std::vector<FunctionSortKey> keys;
+    keys.reserve(module->functions.size());
+
+    size_t originalIndex = 0;
+    for (auto& func : module->functions) {
+      if (func->imported()) {
+        importedFuncs.push_back(std::move(func));
+      } else {
+        FunctionSortKey key;
+        auto& info = analysis.map[func.get()];
+        key.typeStr = std::move(info.typeStr);
+        key.varsStrs = std::move(info.varsStrs);
+        key.opcodeSequence = std::move(info.opcodeSequence);
+        key.originalIndex = originalIndex++;
+        key.func = std::move(func);
+        keys.push_back(std::move(key));
+      }
+    }
+
+    // 2. Sort defined functions by the similarity heuristic
+    std::sort(keys.begin(), keys.end());
+
+    // 3. Re-assemble module->functions vector
+    module->functions.clear();
+    module->functions.reserve(importedFuncs.size() + keys.size());
+
+    for (auto& func : importedFuncs) {
+      module->functions.push_back(std::move(func));
+    }
+    for (auto& key : keys) {
+      module->functions.push_back(std::move(key.func));
+    }
+  }
+};
+
+Pass* createReorderFunctionsBySimilarityPass() {
+  return new ReorderFunctionsBySimilarity();
+}
+
+} // namespace wasm
@@ -442,6 +442,9 @@ void PassRegistry::registerPasses() {
   registerPass("reorder-functions-by-name",
                "sorts functions by name (useful for debugging)",
                createReorderFunctionsByNamePass);
+  registerPass("reorder-functions-by-similarity",
+               "sorts functions by similarity to improve compression",
+               createReorderFunctionsBySimilarityPass);
   registerPass("reorder-functions",
                "sorts functions by access frequency",
                createReorderFunctionsPass);

@@ -144,6 +144,7 @@ Pass* createRemoveUnusedNonFunctionModuleElementsPass();
 Pass* createRemoveUnusedNamesPass();
 Pass* createRemoveUnusedTypesPass();
 Pass* createReorderFunctionsByNamePass();
+Pass* createReorderFunctionsBySimilarityPass();
 Pass* createReorderFunctionsPass();
 Pass* createReorderGlobalsPass();
 Pass* createReorderGlobalsAlwaysPass();

@@ -418,6 +418,9 @@
 ;; CHECK-NEXT:   --reorder-functions-by-name                   sorts functions by name (useful
 ;; CHECK-NEXT:                                                 for debugging)
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --reorder-functions-by-similarity             sorts functions by similarity to
+;; CHECK-NEXT:                                                 improve compression
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --reorder-globals                             sorts globals by access
 ;; CHECK-NEXT:                                                 frequency
 ;; CHECK-NEXT:

@@ -454,6 +454,9 @@
 ;; CHECK-NEXT:   --reorder-functions-by-name                   sorts functions by name (useful
 ;; CHECK-NEXT:                                                 for debugging)
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --reorder-functions-by-similarity             sorts functions by similarity to
+;; CHECK-NEXT:                                                 improve compression
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --reorder-globals                             sorts globals by access
 ;; CHECK-NEXT:                                                 frequency
 ;; CHECK-NEXT:

@@ -382,6 +382,9 @@
 ;; CHECK-NEXT:   --reorder-functions-by-name                   sorts functions by name (useful
 ;; CHECK-NEXT:                                                 for debugging)
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --reorder-functions-by-similarity             sorts functions by similarity to
+;; CHECK-NEXT:                                                 improve compression
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --reorder-globals                             sorts globals by access
 ;; CHECK-NEXT:                                                 frequency
 ;; CHECK-NEXT:

@@ -0,0 +1,87 @@
+;; `reorder-functions-by-similarity=0` disables the size threshold, forcing the compiler to reorder functions.
+;; RUN: foreach %s %t wasm-opt -all --reorder-functions-by-similarity -S -o - | filecheck %s
+
+(module
+  ;; CHECK:      (type $0 (func (result i32)))
+  ;; CHECK-NEXT: (type $1 (func (param i32) (result i32)))
+
+  ;; CHECK:      (func $sig_b (type $1) (param $0 i32) (result i32)
+  ;; CHECK-NEXT:  (i32.const 100)
+  ;; CHECK-NEXT: )
+
+  ;; CHECK:      (func $sig_c (type $1) (param $0 i32) (result i32)
+  ;; CHECK-NEXT:  (i32.const 200)
+  ;; CHECK-NEXT: )
+
+  ;; CHECK:      (func $body_add_2 (type $0) (result i32)
+  ;; CHECK-NEXT:  (i32.add
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:   (i32.const 20)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+
+  ;; CHECK:      (func $body_add_1 (type $0) (result i32)
+  ;; CHECK-NEXT:  (i32.add
+  ;; CHECK-NEXT:   (i32.const 1)
+  ;; CHECK-NEXT:   (i32.const 2)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+
+  ;; CHECK:      (func $body_sub (type $0) (result i32)
+  ;; CHECK-NEXT:  (i32.sub
+  ;; CHECK-NEXT:   (i32.const 1)
+  ;; CHECK-NEXT:   (i32.const 2)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+
+  ;; CHECK:      (func $locals_a (type $0) (result i32)
+  ;; CHECK-NEXT:  (local $0 i32)
+  ;; CHECK-NEXT:  (local $1 f64)
+  ;; CHECK-NEXT:  (i32.const 5)
+  ;; CHECK-NEXT: )
+
+  ;; CHECK:      (func $locals_b (type $0) (result i32)
+  ;; CHECK-NEXT:  (local $0 i32)
+  ;; CHECK-NEXT:  (local $1 f64)
+  ;; CHECK-NEXT:  (i32.const 10)
+  ;; CHECK-NEXT: )
+
+  ;; Functions in mixed order:
+
+  ;; Signature A
+  (func $body_sub (result i32)
+    (i32.sub (i32.const 1) (i32.const 2))
+  )
+
+  ;; Signature B: (param i32) (result i32)
+  (func $sig_b (param i32) (result i32)
+    (i32.const 100)
+  )
+
+  ;; Signature A, same body shape as $body_add_1
+  (func $body_add_2 (result i32)
+    (i32.add (i32.const 10) (i32.const 20))
+  )
+
+  ;; Signature A, has local variables (i32 f64)
+  (func $locals_a (result i32)
+    (local i32 f64)
+    (i32.const 5)
+  )
+
+  ;; Signature A, same body shape as $body_add_2
+  (func $body_add_1 (result i32)
+    (i32.add (i32.const 1) (i32.const 2))
+  )
+
+  ;; Signature A, has local variables (i32 f64), same as $locals_a
+  (func $locals_b (result i32)
+    (local i32 f64)
+    (i32.const 10)
+  )
+
+  ;; Signature B: (param i32) (result i32), same as $sig_b
+  (func $sig_c (param i32) (result i32)
+    (i32.const 200)
+  )
+)