Layer normalization lowering to KRNL (#2561)

AlexandreEichenberger · tungld · web-flow · commit a49d00d437ab · 2023-10-20T10:49:18.000+09:00
* Layer normalization lowering to KRNL

Signed-off-by: Alexandre Eichenberger &lt;alexe@us.ibm.com&gt;

---------

Signed-off-by: Alexandre Eichenberger &lt;alexe@us.ibm.com&gt;
Co-authored-by: Tung D. Le &lt;tung@jp.ibm.com&gt;
diff --git a/docs/SupportedONNXOps-cpu.md b/docs/SupportedONNXOps-cpu.md
@@ -102,7 +102,7 @@ Onnx-mlir currently supports ONNX operations targeting up to opset 19. Limitatio
 | **LRN** |6 - * | | |
 | **LSTM** |7 - * | | |
 | **LabelEncoder** |none | | | |
-| **LayerNormalization** |none | | | |
+| **LayerNormalization** |17 - * | | |
 | **LeakyRelu** |6 - * | | |
 | **Less** |7 - * | | |
 | **LessOrEqual** |12 - * | | |
diff --git a/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp b/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp
@@ -273,7 +273,7 @@ void populateONNXToKrnlConversionPattern(RewritePatternSet &patterns,
   populateLoweringONNXUniqueOpPattern(patterns, typeConverter, ctx);
   // Neural network
   populateLoweringONNXConvOpPattern(patterns, typeConverter, ctx, enableParallel, opsForCall);
-  populateLoweringONNXNormalizationOpPattern(patterns, typeConverter, ctx);
+  populateLoweringONNXNormalizationOpPattern(patterns, typeConverter, ctx, enableSIMD);
   populateLoweringONNXPoolingOpPattern(patterns, typeConverter, ctx);
   // Recurrent neural network
   populateLoweringONNXGRUOpPattern(patterns, typeConverter, ctx);
diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp
@@ -1396,7 +1396,7 @@ int64_t canBeVectorized(ShapeHelperType &shapeHelper, MDBuilder &create,
     simdUnroll = 4;
   else
     simdUnroll = 8;
-  uVL = create.vec.SuitableUnrollFactor(vms, memRefType,
+  uVL = create.vec.computeSuitableUnrollFactor(vms, memRefType,
       shapeHelper.getOutputDims(), collapsedInnermostLoops, simdUnroll,
       /*canPad*/ true, estimatedSimdLoopTripCount);
   LLVM_DEBUG({
diff --git a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp
@@ -544,8 +544,8 @@ struct ONNXReductionOpLowering : public OpConversionPattern<ONNXReductionOp> {
           }
           LLVM_DEBUG(llvm::dbgs()
                      << "  SIMD: study with init unroll " << unroll << "\n");
-          VL = create.vec.SuitableUnrollFactor(vms, memRefInType, inputDims,
-              innermostLoopCollapse, unroll, /*canPad*/ false,
+          VL = create.vec.computeSuitableUnrollFactor(vms, memRefInType,
+              inputDims, innermostLoopCollapse, unroll, /*canPad*/ false,
               estimatedSimdLoopTripCount);
           LLVM_DEBUG(llvm::dbgs() << "  SIMD: " << innermostLoopCollapse
                                   << " loops, VL " << VL << "\n");
diff --git a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp
@@ -13,11 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
+#include "src/Dialect/Krnl/DialectBuilder.hpp"
+#include "src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp"
+
+#define DEBUG_TYPE "lowering-to-krnl"
 
 using namespace mlir;
 
 namespace onnx_mlir {
 
+//===----------------------------------------------------------------------===//
+// Batch Norm
+//===----------------------------------------------------------------------===//
+
 struct ONNXBatchNormalizationInferenceModeOpLowering
     : public OpConversionPattern<ONNXBatchNormalizationInferenceModeOp> {
   ONNXBatchNormalizationInferenceModeOpLowering(
@@ -136,6 +144,10 @@ struct ONNXBatchNormalizationInferenceModeOpLowering
   }
 };
 
+//===----------------------------------------------------------------------===//
+// Instance Normalization
+//===----------------------------------------------------------------------===//
+
 struct ONNXInstanceNormalizationOpLowering
     : public OpConversionPattern<ONNXInstanceNormalizationOp> {
   ONNXInstanceNormalizationOpLowering(
@@ -285,11 +297,103 @@ struct ONNXInstanceNormalizationOpLowering
   }
 };
 
+//===----------------------------------------------------------------------===//
+// Layer Normalization
+//===----------------------------------------------------------------------===//
+
+using MDBuilder = MultiDialectBuilder<KrnlBuilder, IndexExprBuilderForKrnl,
+    MemRefBuilder, MathBuilder, VectorBuilder, OnnxBuilder>;
+
+// Generate the original ONNX operations. This is the unoptimized path.
+// TODO: conversions of types are not handled.
+LogicalResult generateONNXLayerNormalizationOpONNXCode(
+    ConversionPatternRewriter &rewriter, Location loc,
+    ONNXLayerNormalizationOp lnOp) {
+  MDBuilder create(rewriter, loc);
+  Value X = lnOp.getX(); // Original value, not translated.
+  TensorType XType = X.getType().cast<TensorType>();
+  Type elementType = XType.getElementType();
+  int64_t XRank = XType.getRank();
+  int64_t axis = getAxisInRange(lnOp.getAxis(), XRank);
+  // Get epsilon
+  FloatAttr epsilonAttr = lnOp.getEpsilonAttr();
+  DenseElementsAttr epsilonDenseAttr =
+      onnx_mlir::createDenseElementsAttrFromFloatAttr(
+          rewriter, elementType, epsilonAttr);
+  Value epsilon = create.onnx.constant(epsilonDenseAttr);
+
+  // Create reduction axes array.
+  llvm::SmallVector<int64_t, 4> axesIntArray, reductionShape;
+  for (int64_t r = 0; r < axis; ++r)
+    reductionShape.emplace_back(XType.getShape()[r]);
+  for (int64_t r = axis; r < XRank; ++r) {
+    reductionShape.emplace_back(1);
+    axesIntArray.emplace_back(r);
+  }
+  Value axes =
+      create.onnx.constant(create.getBuilder().getI64TensorAttr(axesIntArray));
+  TensorType reductionType = RankedTensorType::get(reductionShape, elementType);
+  // Reduction of input
+  Value meanOfX = create.onnx.reduceMean(reductionType, X, axes);
+  Value pow2OfMeanOfX = create.onnx.mul(meanOfX, meanOfX);
+  Value XPow2 = create.onnx.mul(X, X);
+  Value meanOfXPow2 = create.onnx.reduceMean(reductionType, XPow2, axes);
+  Value var = create.onnx.sub(meanOfXPow2, pow2OfMeanOfX);
+  Value varWithEpsilon = create.onnx.add(var, epsilon);
+  Value stdDev = create.onnx.sqrt(varWithEpsilon);
+  Value invStdDev = create.onnx.reciprocal(stdDev);
+  Value d = create.onnx.sub(X, meanOfX);
+  Value normalized = create.onnx.mul(d, invStdDev);
+  Value Y = create.onnx.mul(normalized, lnOp.getScale());
+  if (!isNoneValue(lnOp.getB()))
+    Y = create.onnx.add(Y, lnOp.getB());
+  llvm::SmallVector<Value, 3> outputs;
+  outputs.emplace_back(Y);
+  Value noneValue;
+  if (isNoneValue(lnOp.getMean()))
+    outputs.emplace_back(noneValue);
+  else
+    outputs.emplace_back(meanOfX);
+  if (isNoneValue(lnOp.getInvStdDev()))
+    outputs.emplace_back(noneValue);
+  else
+    outputs.emplace_back(invStdDev);
+  rewriter.replaceOp(lnOp, outputs);
+  return success();
+}
+
+struct ONNXLayerNormalizationOpLowering
+    : public OpConversionPattern<ONNXLayerNormalizationOp> {
+  ONNXLayerNormalizationOpLowering(
+      TypeConverter &typeConverter, MLIRContext *ctx, bool enableSIMD)
+      : OpConversionPattern(typeConverter, ctx), enableSIMD(enableSIMD) {}
+
+  bool enableSIMD;
+
+  LogicalResult matchAndRewrite(ONNXLayerNormalizationOp lnOp,
+      ONNXLayerNormalizationOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const final {
+    // Get generic info.
+    Operation *op = lnOp.getOperation();
+    ValueRange operands = adaptor.getOperands();
+    Location loc = ONNXLoc<ONNXLayerNormalizationOp>(op);
+    // Create builder and shape helper
+    MDBuilder create(rewriter, loc);
+    ONNXLayerNormalizationOpShapeHelper shapeHelper(
+        op, operands, &create.krnlIE);
+    shapeHelper.computeShapeAndAssertOnFailure();
+
+    return generateONNXLayerNormalizationOpONNXCode(rewriter, loc, lnOp);
+  }
+};
+
 void populateLoweringONNXNormalizationOpPattern(RewritePatternSet &patterns,
-    TypeConverter &typeConverter, MLIRContext *ctx) {
+    TypeConverter &typeConverter, MLIRContext *ctx, bool enableSIMD) {
   patterns.insert<ONNXBatchNormalizationInferenceModeOpLowering>(
       typeConverter, ctx);
   patterns.insert<ONNXInstanceNormalizationOpLowering>(typeConverter, ctx);
+  patterns.insert<ONNXLayerNormalizationOpLowering>(
+      typeConverter, ctx, enableSIMD);
 }
 
 } // namespace onnx_mlir
diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp
@@ -341,8 +341,11 @@ void populateLoweringONNXCategoryMapperOpPattern(
 void populateLoweringONNXConvOpPattern(mlir::RewritePatternSet &,
     mlir::TypeConverter &, mlir::MLIRContext *, bool enableParallel,
     std::string opsForCall);
-void populateLoweringONNXNormalizationOpPattern(
-    mlir::RewritePatternSet &, mlir::TypeConverter &, mlir::MLIRContext *);
+mlir::LogicalResult generateONNXLayerNormalizationOpONNXCode(
+    mlir::ConversionPatternRewriter &rewriter, mlir::Location loc,
+    mlir::ONNXLayerNormalizationOp lnOp);
+void populateLoweringONNXNormalizationOpPattern(mlir::RewritePatternSet &,
+    mlir::TypeConverter &, mlir::MLIRContext *, bool enableSIMD);
 void populateLoweringONNXPoolingOpPattern(
     mlir::RewritePatternSet &, mlir::TypeConverter &, mlir::MLIRContext *);
 
diff --git a/src/Dialect/Mlir/DialectBuilder.cpp b/src/Dialect/Mlir/DialectBuilder.cpp
@@ -1633,7 +1633,7 @@ void VectorBuilder::multiReduction(SmallVectorImpl<Value> &inputVecArray,
   }
 }
 
-int64_t VectorBuilder::SuitableUnrollFactor(VectorMachineSupport *vms,
+int64_t VectorBuilder::computeSuitableUnrollFactor(VectorMachineSupport *vms,
     MemRefType memRefType, llvm::SmallVectorImpl<IndexExpr> &memRefDims,
     int64_t collapsedInnermostLoops, int64_t maxSimdUnroll, bool canPad,
     int64_t &estimatedSimdLoopTripCount) const {
diff --git a/src/Dialect/Mlir/DialectBuilder.hpp b/src/Dialect/Mlir/DialectBuilder.hpp
@@ -440,7 +440,7 @@ struct VectorBuilder final : DialectBuilder {
   // estimatedSimdLoopTripCount: provide an estimation of the SIMD loop trip
   // count. If runtime, return -1; if cannot simdize, return 0; if compile time
   // (or a multiple of a compile time value): return that literal.
-  int64_t SuitableUnrollFactor(VectorMachineSupport *vms,
+  int64_t computeSuitableUnrollFactor(VectorMachineSupport *vms,
       mlir::MemRefType memRefType, llvm::SmallVectorImpl<IndexExpr> &memRefDims,
       int64_t collapsedInnermostLoops, int64_t maxSimdUnroll, bool canPad,
       int64_t &estimatedSimdLoopTripCount) const;
diff --git a/src/Dialect/ONNX/DialectBuilder.cpp b/src/Dialect/ONNX/DialectBuilder.cpp
@@ -196,6 +196,14 @@ Value OnnxBuilder::reduceMax(Type outputType, Value data, Value axes,
       toTensor(data), toTensor(axes), i_keepDims, i_noop_with_empty_axes);
 }
 
+Value OnnxBuilder::reduceMean(Type outputType, Value data, Value axes,
+    bool keepDims, bool noop_with_empty_axes) const {
+  int64_t i_keepDims = keepDims; // 0 if false, 1 if true
+  int64_t i_noop_with_empty_axes = noop_with_empty_axes; // ditto
+  return createTypedOpAndInferShapes<ONNXReduceMeanOp>(toTensor(outputType),
+      toTensor(data), toTensor(axes), i_keepDims, i_noop_with_empty_axes);
+}
+
 Value OnnxBuilder::reduceMin(Type outputType, Value data, Value axes,
     bool keepDims, bool noop_with_empty_axes) const {
   int64_t i_keepDims = keepDims; // 0 if false, 1 if true
@@ -212,6 +220,12 @@ Value OnnxBuilder::reduceSum(Type outputType, Value data, Value axes,
       toTensor(data), toTensor(axes), i_keepDims, i_noop_with_empty_axes);
 }
 
+Value OnnxBuilder::reciprocal(Value input) const {
+  Type outputType = input.getType(); // input == output type.
+  return createTypedOpAndInferShapes<ONNXReciprocalOp>(
+      toTensor(outputType), toTensor(input));
+}
+
 Value OnnxBuilder::reshape(Type outputType, Value input, Value shape) const {
   return createTypedOpAndInferShapes<ONNXReshapeOp>(
       toTensor(outputType), toTensor(input), toTensor(shape));
@@ -261,6 +275,10 @@ Value OnnxBuilder::slice(Type outputType, Value input, int64_t start,
   return slice(outputType, input, startVal, endVal, /*axis*/ zeroVal, stepVal);
 }
 
+Value OnnxBuilder::sqrt(Value input) const {
+  return createOpAndInferShapes<ONNXSqrtOp>(toTensor(input));
+}
+
 ValueRange OnnxBuilder::split(
     TypeRange outputTypes, Value input, Value split, int64_t axis) const {
   IntegerAttr axisAttr =
diff --git a/src/Dialect/ONNX/DialectBuilder.hpp b/src/Dialect/ONNX/DialectBuilder.hpp
@@ -103,6 +103,11 @@ struct OnnxBuilder : DialectBuilder {
       mlir::Value axes, bool keepDims = true,
       bool noop_with_empty_axes = false) const;
 
+  // ONNXReduceMeanOp
+  mlir::Value reduceMean(mlir::Type outputType, mlir::Value data,
+      mlir::Value axes, bool keepDims = true,
+      bool noop_with_empty_axes = false) const;
+
   // ONNXReduceMinOp
   mlir::Value reduceMin(mlir::Type outputType, mlir::Value data,
       mlir::Value axes, bool keepDims = true,
@@ -123,6 +128,9 @@ struct OnnxBuilder : DialectBuilder {
   mlir::Value reshapeToNDim(
       mlir::Value val, int64_t N, bool collapseMostSignificant) const;
 
+  // ONNXReciprocalOp
+  mlir::Value reciprocal(mlir::Value input) const;
+
   // ONNXReverseSequenceOp
   mlir::Value reverseSequence(mlir::Type outputType, mlir::Value input,
       mlir::Value sequenceLens, int64_t batchAxis, int64_t timeAxis) const;
@@ -140,6 +148,9 @@ struct OnnxBuilder : DialectBuilder {
   mlir::Value slice(mlir::Type outputType, mlir::Value input, int64_t start,
       int64_t end, int64_t step = 1) const; // 1D slice
 
+  // ONNXSqrtOp
+  mlir::Value sqrt(mlir::Value input) const;
+
   // ONNXSplitOp
   mlir::ValueRange split(mlir::TypeRange outputTypes, mlir::Value input,
       mlir::Value split, int64_t axis) const;
diff --git a/src/Dialect/ONNX/ONNXOps.cpp b/src/Dialect/ONNX/ONNXOps.cpp
@@ -234,13 +234,14 @@ ParseResult ONNXConstantOfShapeOp::parse(
 }
 
 //===----------------------------------------------------------------------===//
-// Constant Materializer for ONNX Dialect
+// Constant Materialize for ONNX Dialect
 //===----------------------------------------------------------------------===//
 Operation *ONNXDialect::materializeConstant(
     OpBuilder &builder, Attribute value, Type type, Location loc) {
-  // The atrribute could be either a UnitAttr or DenseElementsAttr, IntAttr,
+  // The attribute could be either a UnitAttr or DenseElementsAttr, IntAttr,
   // FloatAttr and etc.
-  // OnnxBuilder converts it into (the result of) a ONNXNoneOp or ONNXContantOp.
+  // OnnxBuilder converts it into (the result of) a ONNXNoneOp or
+  // ONNXConstantOp.
   MultiDialectBuilder<OnnxBuilder> create(builder, loc);
   Value result =
       isa<UnitAttr>(value) ? create.onnx.none() : create.onnx.constant(value);
diff --git a/src/Dialect/ONNX/ONNXOps/NN/Normalization.cpp b/src/Dialect/ONNX/ONNXOps/NN/Normalization.cpp
@@ -177,10 +177,8 @@ LogicalResult ONNXLayerNormalizationOp::verify() {
 
   // Axis attribute (if specified) must be in the range [-r,r), where r =
   // rank(input).
-  if (axis < -XRank || axis >= XRank)
+  if (!isAxisInRange(axis, XRank))
     return emitOpError("axis must be in [-r, r) range]");
-  if (axis < 0)
-    axis += XRank;
 
   // Check bias B.
   if (hasShapeAndRank(B)) {
@@ -226,11 +224,9 @@ mlir::LogicalResult ONNXLayerNormalizationOpShapeHelper::computeShape() {
   ONNXLayerNormalizationOp lnOp = llvm::cast<ONNXLayerNormalizationOp>(op);
 
   // Get rank and axis attribute.
-  int64_t axis = lnOp.getAxis();
   Value X = operandAdaptor.getX();
   int64_t XRank = X.getType().cast<ShapedType>().getRank();
-  if (axis < 0)
-    axis += XRank;
+  int64_t axis = getAxisInRange(lnOp.getAxis(), XRank);
 
   // Compute the shape of the first output and all the inputs.
   llvm::SmallVector<Value, 3> operandsForBroadcast;
diff --git a/src/Dialect/ONNX/ONNXOps/ShapeHelper.cpp b/src/Dialect/ONNX/ONNXOps/ShapeHelper.cpp
@@ -30,6 +30,39 @@ using namespace mlir;
 
 namespace onnx_mlir {
 
+//===----------------------------------------------------------------------===//
+// Support functions
+//===----------------------------------------------------------------------===//
+
+// Check if axis is in [-rank, rank), or [-rank, rank] when includeRank is true.
+// Return false when not in range; set axis to positive value when in range.
+bool isAxisInRange(int64_t &axis, int64_t rank, bool includeRank) {
+  int64_t ub = includeRank ? rank + 1 : rank;
+  if (axis < -rank || axis >= ub)
+    return false;
+  if (axis < 0)
+    axis += rank;
+  return true;
+}
+
+bool isAxisInRange(int64_t &axis, Value val, bool includeRank) {
+  ShapedType shapedType = val.getType().cast<ShapedType>();
+  assert(shapedType && "expected a shaped type to determine the rank for axis");
+  return isAxisInRange(axis, shapedType.getRank(), includeRank);
+}
+
+// Check if axis is in [-rank, rank), or [-rank, rank] when includeRank is
+// true.  Assert when not in range. Return positive axis.
+int64_t getAxisInRange(int64_t axis, int64_t rank, bool includeRank) {
+  assert(isAxisInRange(axis, rank, includeRank) && "expected axis in range");
+  return axis;
+}
+
+int64_t getAxisInRange(int64_t axis, Value val, bool includeRank) {
+  assert(isAxisInRange(axis, val, includeRank) && "expected axis in range");
+  return axis;
+}
+
 //===----------------------------------------------------------------------===//
 // ONNX Op Shape Helper
 //===----------------------------------------------------------------------===//
diff --git a/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp b/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp
@@ -42,6 +42,19 @@ namespace onnx_mlir {
 // Support functions.
 //===----------------------------------------------------------------------===//
 
+// Check if axis is in [-rank, rank), or [-rank, rank] when includeRank is
+// true.  Assert when not in range. Return positive axis.
+int64_t getAxisInRange(int64_t axis, int64_t rank, bool includeRank = false);
+int64_t getAxisInRange(int64_t axis, mlir::Value val, bool includeRank = false);
+// Check if axis is in [-rank, rank), or [-rank, rank] when includeRank is true.
+// Return false when not in range; set axis to positive value when in range.
+bool isAxisInRange(int64_t &axis, int64_t rank, bool includeRank = false);
+bool isAxisInRange(int64_t &axis, mlir::Value val, bool includeRank = false);
+
+//===----------------------------------------------------------------------===//
+// Support functions.
+//===----------------------------------------------------------------------===//
+
 // Update a tensor type by using the given shape, elementType and encoding.
 // TODO: when all ops are migrated to the new scheme, make this function private
 // to ONNXOpShapeHelper.
diff --git a/test/backend/inference_backend.py b/test/backend/inference_backend.py
diff --git a/utils/CheckONNXModel.py b/utils/CheckONNXModel.py
diff --git a/utils/RunONNXModel.py b/utils/RunONNXModel.py

Original file line number	Diff line number	Diff line change
`@@ -1633,7 +1633,7 @@ void VectorBuilder::multiReduction(SmallVectorImpl<Value> &inputVecArray,`
`1633`	`1633`	`}`
`1634`	`1634`	`}`
`1635`	`1635`
`1636`		`-int64_t VectorBuilder::SuitableUnrollFactor(VectorMachineSupport *vms,`
	`1636`	`+int64_t VectorBuilder::computeSuitableUnrollFactor(VectorMachineSupport *vms,`
`1637`	`1637`	`MemRefType memRefType, llvm::SmallVectorImpl<IndexExpr> &memRefDims,`
`1638`	`1638`	`int64_t collapsedInnermostLoops, int64_t maxSimdUnroll, bool canPad,`
`1639`	`1639`	`int64_t &estimatedSimdLoopTripCount) const {`