fix issue with pushing reshape when broadcasting

Matthew Francis-Landau · Matthew Francis-Landau · commit a1c0d1eab765 · 2025-10-30T11:50:22.000-07:00
diff --git a/mlir-tensorrt/tensorrt/lib/TensorRT/Transforms/TransposeReshapeElimination.cpp b/mlir-tensorrt/tensorrt/lib/TensorRT/Transforms/TransposeReshapeElimination.cpp
@@ -1423,7 +1423,7 @@ class PushReshapeUpThroughEinsum
 
     LLVM_DEBUG({
       std::stringstream out;
-      out << "==== Einsum Reshape/Transpose Pushdown Debug ====\n";
+      out << "==== Einsum Reshape/Transpose Pushup Debug ====\n";
       for (const auto &entry : charToGroup) {
         out << "  charToGroup[" << entry.first << "] = " << entry.second
             << "\n";
@@ -1492,8 +1492,16 @@ class PushReshapeUpThroughEinsum
           for (char c : group->second)
             newInputTranspose.push_back(equation.lhsParts[i].find(c));
           newInputEquation += inputToReshapedMap[group->second].newAxes;
-          for (int64_t v : inputToReshapedMap[group->second].newShape)
-            newInputShape.push_back(v);
+          for (int64_t v : inputToReshapedMap[group->second].newShape) {
+            if (v != 1 && group->second.size() == 1 &&
+                inputType.getDimSize(j) == 1) {
+              // if the group is of size 1, then it can have different sizes for
+              // each input due to broadcasting
+              newInputShape.push_back(1);
+            } else {
+              newInputShape.push_back(v);
+            }
+          }
         }
       }
 
@@ -1516,6 +1524,13 @@ class PushReshapeUpThroughEinsum
             out << ", ";
         }
         out << "]\n";
+        out << "  oldShape: [";
+        for (size_t si = 0; si < inputType.getShape().size(); ++si) {
+          out << inputType.getShape()[si];
+          if (si + 1 < inputType.getShape().size())
+            out << ", ";
+        }
+        out << "]\n";
         DBGS() << out.str() << "\n";
       });
 
@@ -1529,11 +1544,13 @@ class PushReshapeUpThroughEinsum
       newInputs.push_back(newReshape);
       newEquation.lhsParts.push_back(newInputEquation);
     }
-    LLVM_DEBUG(
-        { DBGS() << "===============================================\n"; });
-
     std::string newEquationStr = newEquation.generateEquation();
 
+    LLVM_DEBUG({
+      DBGS() << newEquationStr << "\n"
+             << "===============================================\n";
+    });
+
     auto newEinsum = rewriter.create<tensorrt::EinsumOp>(
         einsum.getLoc(), op.getType(), newInputs, newEquationStr);
     assert(newEquation.rhs.size() == newEinsum.getType().getShape().size());
@@ -3056,7 +3073,9 @@ class TransposeReshapeEliminationPass
           PushUpReshapeUnary<UnaryOp>, PushUpReshapeUnary<ActivationOp>,
           PushUpOpQuantizeDequantize<tensorrt::TransposeOp>,
           PushUpOpQuantizeDequantize<tensorrt::ReshapeOp>,
+
           PushReshapeUpThroughEinsum, PushUpReshapeElementwise,
+
           PushUpTransposeSoftmax, PushUpReshapeSoftmax,
           SimpleTransposeToReshape>(ctx, PatternBenefit(2));
       patterns.insert<EinsumPushUpTranspose>(ctx, PatternBenefit(1));
diff --git a/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/transpose-reshape-elimination.mlir b/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/transpose-reshape-elimination.mlir
@@ -347,4 +347,22 @@ func.func @can_not_push_reshape_through_einsum(%arg0: tensor<2x20x12x64xf32>, %a
   %0 = tensorrt.einsum {equation = "acbd,abcd->abd"} ins(%arg0, %arg1 : tensor<2x20x12x64xf32>, tensor<2x12x20x1xf32>) -> tensor<2x12x64xf32>
   %1 = tensorrt.reshape %0 : tensor<2x12x64xf32> to tensor<2x1x768xf32>
   return %1 : tensor<2x1x768xf32>
+}
+
+// -----
+
+// CHECK: @push_reshape_broadcast(%[[arg0:.+]]: tensor<6x64x448xf32>, %[[arg1:.+]]: tensor<6x1x448xf32>)
+// CHECK: %[[const:.+]] = tensorrt.constant dense_resource<__elided__> : tensor<1x1x384x384xf32>
+// CHECK-DAG: %[[v0:.+]] = tensorrt.expand_rank %[[arg0]] : tensor<6x64x448xf32> to tensor<1x1x6x64x448xf32>
+// CHECK-DAG: %[[v1:.+]] = tensorrt.expand_rank %[[arg1]] : tensor<6x1x448xf32> to tensor<1x1x6x1x448xf32>
+// CHECK: %[[v2:.+]] = tensorrt.matrix_multiply {{{.*}}} ins(%[[v0]], %[[v1]] : {{.*}})
+// CHECK: %[[v3:.+]] = tensorrt.reshape %[[v2]] : tensor<1x1x6x64xf32> to tensor<1x1x384xf32>
+// CHECK: %[[v4:.+]] = tensorrt.matrix_multiply {{{.*}}} ins(%[[v3]], %[[const]] : {{.*}}) -> tensor<1x1x384xf32>
+// CHECK: return %[[v4]]
+func.func @push_reshape_broadcast(%arg0: tensor<6x64x448xf32>, %arg1: tensor<6x1x448xf32>) -> tensor<1x1x384xf32> {
+    %const = tensorrt.constant dense_resource<__elided__> : tensor<384x6x64xf32>
+    %1 = tensorrt.einsum {equation = "bdc,bdc->bd"} ins(%arg0, %arg1 : tensor<6x64x448xf32>, tensor<6x1x448xf32>) -> tensor<6x64xf32>
+    %2 = tensorrt.einsum {equation = "bd,ebd->e"} ins(%1, %const : tensor<6x64xf32>, tensor<384x6x64xf32>) -> tensor<384xf32>
+    %3 = tensorrt.reshape %2 : tensor<384xf32> to tensor<1x1x384xf32>
+    return %3 :  tensor<1x1x384xf32>
 }