From 0183ad9bf95a90144c8dad139d16718a8421c845 Mon Sep 17 00:00:00 2001 From: "Tung D. Le" Date: Fri, 10 Jan 2025 12:59:26 +0900 Subject: [PATCH] Fix some bugs for ReduceMin/Max (#3038) Signed-off-by: Tung D. Le --- .../ONNXToZHigh/ONNXLegalityCheck.cpp | 84 +++++++++---------- .../Conversion/ONNXToZHigh/ONNXToZHigh.td | 4 +- .../Conversion/ZHighToZLow/ZHighToZLow.cpp | 21 ++--- .../NNPA/Conversion/ZLowToLLVM/ZLowToLLVM.cpp | 33 +++----- .../ZLowToLLVM/ZLowToLLVMCommon.cpp | 3 +- .../ZLowToLLVM/ZLowToLLVMCommon.hpp | 6 +- src/Accelerators/NNPA/Dialect/ZHigh/ZHigh.td | 14 ++-- .../ZHigh/ZHighOps/Reduction/Reduction.cpp | 11 +-- src/Accelerators/NNPA/Dialect/ZLow/ZLow.td | 6 +- test/accelerators/NNPA/backend/CMakeLists.txt | 25 +----- .../conversion/onnx-to-zhigh/reducemax.mlir | 76 +++++++++++++++-- .../conversion/onnx-to-zhigh/reducemin.mlir | 82 +++++++++++++++--- .../conversion/zhigh-to-zlow/reducemax.mlir | 19 +++-- .../conversion/zhigh-to-zlow/reducemin.mlir | 21 +++-- 14 files changed, 250 insertions(+), 155 deletions(-) diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp index aa161a9f9e..76fa3fa547 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp @@ -953,49 +953,29 @@ bool isSuitableForZDNN( // Common function for ReduceMax and ReduceMin template static bool checkReduceParam(OP_TYPE op) { - IndexExprBuilderForAnalysis createIE(op.getLoc()); + OpBuilder b(op); + Location loc = op.getLoc(); + IndexExprBuilderForAnalysis createIE(loc); + IndexExprScope ieScope(&b, loc); + + Value data = op.getData(); + Value axesVal = op.getAxes(); + int64_t keepdims = op.getKeepdims(); + int64_t noop_with_empty_axes = op.getNoopWithEmptyAxes(); // Check NNPA level. if (!isCompatibleWithNNPALevel(NNPALevel::M15)) return onnxToZHighInCompatibilityReport(op.getOperation(), NNPALevel::M15); // Check data type. - Value data = op.getData(); + int64_t rank = getRank(data.getType()); if (!isValidElementTypeAndRank(op.getOperation(), data)) return false; - // Check axes value - Value axesVal = op.getAxes(); - if (!isDenseONNXConstant(axesVal)) - return false; - - ONNXConstantOp axesConstant = - mlir::cast(axesVal.getDefiningOp()); - int64_t axesInt = getScalarValue(axesConstant); - - int64_t keepdims = op.getKeepdims(); - int64_t noop_with_empty_axes = op.getNoopWithEmptyAxes(); - int64_t rank = createIE.getShapedTypeRank(data); - - // Check if axes (int64) is exactly a size of one - if (floor(log10(axesInt)) + 1 == 1) { - int64_t axis = axesInt; - // Accepted range is [-r, r-1] where r = rank(data) - if (axis < -rank || axis > rank - 1) { - std::string message = - "The `axis` is out of the accepted range which is [-r, r-1]"; - return onnxToZHighUnsupportedReport(op, message); - } - if ((axis != -1) && (axis != rank - 1)) { - std::string message = "The `axis` must be the innermost dimension. "; - return onnxToZHighUnsupportedReport(op, message); - } - } else { - std::string message = "Axes can only be a scalar size of one. "; - return onnxToZHighUnsupportedReport(op, message); - } - - // REMINDER: Should we check the input tensor rank. + // NNPA does not support reduction over all axes. + if (isNoneValue(axesVal)) + return onnxToZHighUnsupportedReport( + op.getOperation(), "Does not support reduction over all axes."); // Check keepdims and noop_with_empty_axes, we only support the default // value. Attributes: keepdims (default is 1) and noop_with_empty_axes @@ -1007,6 +987,28 @@ static bool checkReduceParam(OP_TYPE op) { std::to_string(keepdims) + ") must be 1."; return onnxToZHighUnsupportedReport(op, message); } + + // Check axes value + DimsExpr axesIE; + createIE.getIntFromArrayAsDims(axesVal, axesIE); + if (axesIE.size() != 1) + return onnxToZHighUnsupportedReport( + op.getOperation(), "Does not support multiple reduction axes."); + if (!axesIE[0].isLiteral()) + return onnxToZHighUnsupportedReport( + op.getOperation(), "Reduction axis is unknown at compile time."); + int64_t axis = axesIE[0].getLiteral(); + // Accepted range is [-r, r-1] where r = rank(data) + if (axis < -rank || axis > rank - 1) { + std::string message = + "Reduction axis is out of the accepted range which is [-r, r-1]"; + return onnxToZHighUnsupportedReport(op, message); + } + if ((axis != -1) && (axis != rank - 1)) { + std::string message = "Reduction axis must be the innermost dimension. "; + return onnxToZHighUnsupportedReport(op, message); + } + return true; } @@ -1014,26 +1016,16 @@ static bool checkReduceParam(OP_TYPE op) { template <> bool isSuitableForZDNN( ONNXReduceMaxOp op, const DimAnalysis *dimAnalysis) { - // Check parameter restrictions for ReduceMax - bool isReduceMax = checkReduceParam(op); - if (!isReduceMax) - return false; - - return true; + return checkReduceParam(op); } /// Check legality for ONNXReduceMin. template <> bool isSuitableForZDNN( ONNXReduceMinOp op, const DimAnalysis *dimAnalysis) { - // Check parameter restrictions for ReduceMin - bool isReduceMin = checkReduceParam(op); - if (!isReduceMin) - return false; - - return true; + return checkReduceParam(op); } /// Check legality for ONNXReduceMeanV13. diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.td b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.td index d09d271432..7e6f724c57 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.td +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.td @@ -471,7 +471,7 @@ def replaceONNXReduceMeanV13Pattern : Pat< def replaceONNXReduceMaxPattern : Pat< (ONNXReduceMaxOp:$res $data, $axes, $keepdims, $noop_with_empty_axes), (ZHighUnstickOp (ZHighReduceMaxOp (ZHighStickOp:$s_x $data, (NoneLayoutAttr), - (GetDefaultSaturation)), (GetStringAttr<"REDUCE_OP_MAXIMUM">))), + (GetDefaultSaturation)))), [(IsCompatibleWithNNPALevelArch15)] >; @@ -484,7 +484,7 @@ def replaceONNXReduceMaxPattern : Pat< def replaceONNXReduceMinPattern : Pat< (ONNXReduceMinOp:$res $data, $axes, $keepdims, $noop_with_empty_axes), (ZHighUnstickOp (ZHighReduceMinOp (ZHighStickOp:$s_x $data, (NoneLayoutAttr), - (GetDefaultSaturation)), (GetStringAttr<"REDUCE_OP_MINIMUM">))), + (GetDefaultSaturation)))), [(IsCompatibleWithNNPALevelArch15)] >; //===----------------------------------------------------------------------===// diff --git a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp index 3cf8e2bd4f..2cdc850e02 100644 --- a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp +++ b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp @@ -1123,8 +1123,7 @@ struct ZHighToZLowReduceOpLowering : public ConversionPattern { Value data = operands[0]; // Helper builders. - MultiDialectBuilder + MultiDialectBuilder create(rewriter, loc); // Convert ZTensor type to MemRefType. @@ -1132,24 +1131,26 @@ struct ZHighToZLowReduceOpLowering : public ConversionPattern { convertZTensorToMemRefType(*op->result_type_begin()); // Shape helper. - ZHighReduceMaxOpShapeHelper shapeHelper(op, operands, &create.krnlIE); + ZHighReductionOpShapeHelper shapeHelper( + op, operands, &create.krnlIE); shapeHelper.computeShapeAndAssertOnFailure(); - SmallVector &dims = shapeHelper.getOutputDims(); // Allocate a buffer for the result MemRef. Value alloc = insertAllocForZMemRef( zMemRefType, shapeHelper.getOutputDims(), op, rewriter); // Get the original shape before it is vanished by lower passes. - Value shape = insertShapeMemRefI64(rewriter, loc, dims); + DimsExpr dataDims; + create.krnlIE.getShapeAsDims(data, dataDims); + Value shape = insertShapeMemRefI64(rewriter, loc, dataDims); - // If set to NULL, the operation will determine, allocate and free storage - // automatically. - Value workArea = create.llvm.null(krnl::getI8PointerType(context)); + // Emit 'alloc' for work_area that is of 4K-aligned 8K bytes. + Value workArea = create.mem.alignedAlloc( + MemRefType::get({8 * 1024}, rewriter.getIntegerType(8)), gAlignment); // Emit a ZLow operation. - rewriter.create::Op>(loc, data, workArea, - shape, alloc, zMemRefType.layout, reduceOp.getOpTypeAttr()); + rewriter.create::Op>( + loc, data, workArea, shape, alloc, zMemRefType.layout); rewriter.replaceOp(op, alloc); return success(); } diff --git a/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVM.cpp b/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVM.cpp index c29d18d056..2c3f8fa768 100644 --- a/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVM.cpp +++ b/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVM.cpp @@ -1152,18 +1152,18 @@ class ZLowLeakyReluLowering : public ConvertToLLVMPattern { }; template -API getReduceAPI() { - return API::NULL_API; +zdnn_reduce_ops getZDNNReduceOpType() { + return REDUCE_OP_MAXIMUM; } template <> -API getReduceAPI() { - return API::ZDNN_REDUCEMAX; +zdnn_reduce_ops getZDNNReduceOpType() { + return REDUCE_OP_MAXIMUM; } template <> -API getReduceAPI() { - return API::ZDNN_REDUCEMIN; +zdnn_reduce_ops getZDNNReduceOpType() { + return REDUCE_OP_MINIMUM; } template @@ -1210,8 +1210,8 @@ class ZLowReduceLowering : public ConvertToLLVMPattern { Value one = create.llvm.constant(llvmI64Ty, static_cast(1)); // Calculation for the output dimension - int64_t axis = dims.size(); - std::vector outputDims; + int64_t axis = dims.size() - 1; + SmallVector outputDims; for (int64_t i = 0; i < axis; ++i) { outputDims.emplace_back(dims[i]); } @@ -1236,20 +1236,13 @@ class ZLowReduceLowering : public ConvertToLLVMPattern { zTensorHelper.getAlignedI8Ptr(operandAdaptor.getWorkArea()); // op_type - nnpa_reduce_operations opType; - StringRef opTypeStr = reduceOp.getOpType(); - if (opTypeStr.equals_insensitive("REDUCE_OP_MINIMUM")) - opType = NNPA_REDUCE_OP_MINIMUM; - else if (opTypeStr.equals_insensitive("REDUCE_OP_MAXIMUM")) - opType = NNPA_REDUCE_OP_MAXIMUM; - else - llvm_unreachable("Unsupported operation type"); - Value optype = create.llvm.constant( - rewriter.getI64Type(), static_cast(opType)); + zdnn_reduce_ops zdnnOpType = getZDNNReduceOpType(); + Value opType = create.llvm.constant( + rewriter.getI64Type(), static_cast(zdnnOpType)); // Call the zDNN ReduceMax/ReduceMin API. - callApi(rewriter, loc, module, apiRegistry, getReduceAPI(), - {toOpaquePtr(rewriter, loc, module, inputZTensor.val), workArea, optype, + callApi(rewriter, loc, module, apiRegistry, API::ZDNN_REDUCE, + {toOpaquePtr(rewriter, loc, module, inputZTensor.val), workArea, opType, toOpaquePtr(rewriter, loc, module, outputZTensor.val)}); rewriter.eraseOp(op); diff --git a/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.cpp b/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.cpp index 86f4b43bbb..114c19d618 100644 --- a/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.cpp +++ b/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.cpp @@ -69,8 +69,7 @@ ApiRegistry RegisterAllApis(MLIRContext *context) { ApiSpec(API::ZDNN_LOG, "zdnn_log_ext", int32Ty, {opaquePtrTy, opaquePtrTy}, false), ApiSpec(API::ZDNN_EXP, "zdnn_exp_ext", int32Ty, {opaquePtrTy, opaquePtrTy}, false), ApiSpec(API::ZDNN_INVSQRT, "zdnn_invsqrt_ext", int32Ty, {opaquePtrTy, float32Ty, opaquePtrTy}, false), - ApiSpec(API::ZDNN_REDUCEMAX, "zdnn_reduce_ext", int32Ty, {opaquePtrTy, opaquePtrTy, int64Ty, opaquePtrTy}, false), - ApiSpec(API::ZDNN_REDUCEMIN, "zdnn_reduce_ext", int32Ty, {opaquePtrTy, opaquePtrTy, int64Ty, opaquePtrTy}, false), + ApiSpec(API::ZDNN_REDUCE, "zdnn_reduce_ext", int32Ty, {opaquePtrTy, opaquePtrTy, int64Ty, opaquePtrTy}, false), // Activation operations ApiSpec(API::ZDNN_LEAKY_RELU, "zdnn_leaky_relu_ext", int32Ty, {opaquePtrTy, opaquePtrTy, float32Ty, opaquePtrTy}, false), ApiSpec(API::ZDNN_RELU, "zdnn_relu_ext", int32Ty, {opaquePtrTy, opaquePtrTy, opaquePtrTy}, false), diff --git a/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.hpp b/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.hpp index 8faa68ee8d..fc427e5c87 100644 --- a/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.hpp +++ b/src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVMCommon.hpp @@ -48,8 +48,9 @@ enum class API { ZDNN_LOG, ZDNN_EXP, ZDNN_INVSQRT, - ZDNN_REDUCEMAX, - ZDNN_REDUCEMIN, + // Reduction operations + ZDNN_REDUCE, + ZDNN_MEANREDUCE2D, // Activation operations ZDNN_RELU, ZDNN_GELU, @@ -68,7 +69,6 @@ enum class API { ZDNN_CONV2D, ZDNN_AVGPOOL2D, ZDNN_MAXPOOL2D, - ZDNN_MEANREDUCE2D, ZDNN_BATCHNORM, ZDNN_LEAKY_RELU, // Scalar operations. diff --git a/src/Accelerators/NNPA/Dialect/ZHigh/ZHigh.td b/src/Accelerators/NNPA/Dialect/ZHigh/ZHigh.td index 6f80f12e66..7bbcd02c87 100644 --- a/src/Accelerators/NNPA/Dialect/ZHigh/ZHigh.td +++ b/src/Accelerators/NNPA/Dialect/ZHigh/ZHigh.td @@ -654,14 +654,13 @@ def ZHighReduceMaxOp:ZHigh_Op<"ReduceMax", [Pure, SameOperandsAndResultLayout, ZHigh operation to perform a ReduceMax. op_type: REDUCE_OP_MAXIMUM or REDUCE_OP_MINIMUM. }]; - let arguments = (ins AnyTypeOf<[AnyZTensor]>:$data, - DefaultValuedStrAttr:$op_type); + let arguments = (ins AnyTypeOf<[AnyZTensor]>:$data); let results = (outs AnyTypeOf<[AnyZTensor]>:$output); let builders = [ - OpBuilder<(ins "::mlir::Value":$data, "::mlir::StringAttr":$op_type), [{ + OpBuilder<(ins "::mlir::Value":$data), [{ Type elementType = mlir::cast(data.getType()).getElementType(); UnrankedTensorType resType = UnrankedTensorType::get(elementType); - build($_builder, $_state, resType, data, op_type); + build($_builder, $_state, resType, data); }]> ]; let extraClassDefinition = [{ @@ -682,14 +681,13 @@ def ZHighReduceMinOp:ZHigh_Op<"ReduceMin", [Pure, SameOperandsAndResultLayout, ZHigh operation to perform a ReduceMin. op_type: REDUCE_OP_MAXIMUM or REDUCE_OP_MINIMUM. }]; - let arguments = (ins AnyTypeOf<[AnyZTensor]>:$data, - DefaultValuedStrAttr:$op_type); + let arguments = (ins AnyTypeOf<[AnyZTensor]>:$data); let results = (outs AnyTypeOf<[AnyZTensor]>:$output); let builders = [ - OpBuilder<(ins "::mlir::Value":$data, "::mlir::StringAttr":$op_type), [{ + OpBuilder<(ins "::mlir::Value":$data), [{ Type elementType = mlir::cast(data.getType()).getElementType(); UnrankedTensorType resType = UnrankedTensorType::get(elementType); - build($_builder, $_state, resType, data, op_type); + build($_builder, $_state, resType, data); }]> ]; let extraClassDefinition = [{ diff --git a/src/Accelerators/NNPA/Dialect/ZHigh/ZHighOps/Reduction/Reduction.cpp b/src/Accelerators/NNPA/Dialect/ZHigh/ZHighOps/Reduction/Reduction.cpp index 91e96a5a62..02daf76ad0 100644 --- a/src/Accelerators/NNPA/Dialect/ZHigh/ZHighOps/Reduction/Reduction.cpp +++ b/src/Accelerators/NNPA/Dialect/ZHigh/ZHighOps/Reduction/Reduction.cpp @@ -29,9 +29,6 @@ LogicalResult ZHighReductionOpShapeHelper::computeShape() { // Get operand. Value data = operandAdaptor.getData(); - // Get Rank - int64_t rank = createIE->getShapedTypeRank(data); - // Output dims of result. DimsExpr outputDims; @@ -39,15 +36,15 @@ LogicalResult ZHighReductionOpShapeHelper::computeShape() { SmallVector inputDims; createIE->getShapeAsDims(data, inputDims); - int64_t axis = rank - 1; + // NNPA only supports reduction over the innermost dimension. + // So set the innermost dimension of the output to one. + int64_t axis = inputDims.size() - 1; LiteralIndexExpr one(1); // Copy the input until the second to last dimension for (int64_t i = 0; i < axis; ++i) { outputDims.emplace_back(inputDims[i]); } - // The innermost dimension or last dimension needs to be reduced to one - outputDims.emplace_back( - one); // NNPA is always true for keepdims so we will reduce the dimension + outputDims.emplace_back(one); // Save the final result. setOutputDims(outputDims); diff --git a/src/Accelerators/NNPA/Dialect/ZLow/ZLow.td b/src/Accelerators/NNPA/Dialect/ZLow/ZLow.td index 366252f6b7..a66cb8273f 100644 --- a/src/Accelerators/NNPA/Dialect/ZLow/ZLow.td +++ b/src/Accelerators/NNPA/Dialect/ZLow/ZLow.td @@ -259,8 +259,7 @@ def ZLowReduceMaxOp:ZLow_Op<"reducemax", [MemRefsNormalizable]> { MemRefOf<[I8]>:$work_area, MemRefOf<[I64]>:$shape, ZMemRef:$Out, - StrAttr:$layout, - StrAttr:$op_type); + StrAttr:$layout); } def ZLowReduceMinOp:ZLow_Op<"reducemin", [MemRefsNormalizable]> { @@ -272,8 +271,7 @@ def ZLowReduceMinOp:ZLow_Op<"reducemin", [MemRefsNormalizable]> { MemRefOf<[I8]>:$work_area, MemRefOf<[I64]>:$shape, ZMemRef:$Out, - StrAttr:$layout, - StrAttr:$op_type); + StrAttr:$layout); } def ZLowMatMulOp:ZLow_Op<"matmul", [MemRefsNormalizable, diff --git a/test/accelerators/NNPA/backend/CMakeLists.txt b/test/accelerators/NNPA/backend/CMakeLists.txt index fb5ef89904..272be114d2 100644 --- a/test/accelerators/NNPA/backend/CMakeLists.txt +++ b/test/accelerators/NNPA/backend/CMakeLists.txt @@ -438,31 +438,14 @@ set(NNPA_TEST_LIST_ARCH_15 # ==OP== ReduceMax # ==LEVEL== arch15 # ==MIN== 1 - # ==LIM== - We do no support `do_not_keepdims` backend tests. - # test_reduce_max_bool_inputs_cpu,zdnn_reduce_ext - test_reduce_max_default_axes_keepdim_example_cpu,zdnn_reduce_ext - test_reduce_max_default_axes_keepdims_random_cpu,zdnn_reduce_ext - # test_reduce_max_do_not_keepdims_example_cpu,zdnn_reduce_ext - # test_reduce_max_do_not_keepdims_random_cpu,zdnn_reduce_ext - test_reduce_max_keepdims_example_cpu,zdnn_reduce_ext - test_reduce_max_keepdims_random_cpu,zdnn_reduce_ext - test_reduce_max_negative_axes_keepdims_example_cpu,zdnn_reduce_ext - test_reduce_max_negative_axes_keepdims_random_cpu,zdnn_reduce_ext + # ==LIM== - We do not support `do_not_keepdims` backend tests. Only support reduction over the innermost dimension. + # Currrently, there is no backend test in ONNX that does reduction on the innermost dimension. # ==OP== ReduceMin # ==LEVEL== arch15 # ==MIN== 1 - # ==LIM== - We do no support `do_not_keepdims` backend tests. - # test_reduce_min_bool_inputs_cpu,zdnn_reduce_ext - test_reduce_min_default_axes_keepdims_example_cpu,zdnn_reduce_ext - test_reduce_min_default_axes_keepdims_random_cpu,zdnn_reduce_ext - # test_reduce_min_do_not_keepdims_example_cpu,zdnn_reduce_ext - # test_reduce_min_do_not_keepdims_random_cpu,zdnn_reduce_ext - test_reduce_min_empty_set_cpu,zdnn_reduce_ext - test_reduce_min_keepdims_example_cpu,zdnn_reduce_ext - test_reduce_min_keepdims_random_cpu,zdnn_reduce_ext - test_reduce_min_negative_axes_keepdims_example_cpu,zdnn_reduce_ext - test_reduce_min_negative_axes_keepdims_random_cpu,zdnn_reduce_ext + # ==LIM== - We do not support `do_not_keepdims` backend tests. Only support reduction over the innermost dimension. + # Currrently, there is no backend test in ONNX that does reduction on the innermost dimension. # ==OP== Sqrt # ==LEVEL== arch15 diff --git a/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemax.mlir b/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemax.mlir index 2b3bdd7f68..77c6052512 100644 --- a/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemax.mlir +++ b/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemax.mlir @@ -1,17 +1,75 @@ -// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --shape-inference --convert-onnx-to-zhigh %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --shape-inference --convert-onnx-to-zhigh --canonicalize %s -split-input-file | FileCheck %s - func.func @test_reduce_max_axes_defined_noop_0(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { - %cst = "onnx.Constant"() {value = dense<[2]> : tensor<1xi64> } : () -> tensor<1xi64> - %0 ="onnx.ReduceMax"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> - "func.return"(%0) : (tensor<*xf32>) -> () +func.func @test_reduce_max_axes_defined_noop_0(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<[2]> : tensor<1xi64> } : () -> tensor<1xi64> + %0 ="onnx.ReduceMax"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () // mlir2FileCheck.py // CHECK-LABEL: func.func @test_reduce_max_axes_defined_noop_0 // CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<3x2x1xf32> { -// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<2> : tensor<1xi64> -// CHECK-DAG: [[VAR_1_:%.+]] = "zhigh.Stick"([[PARAM_0_]]) {layout = "3DS"} : (tensor<3x2x2xf32>) -> tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// CHECK: [[VAR_2_:%.+]] = "zhigh.ReduceMax"([[VAR_1_]]) {op_type = "REDUCE_OP_MAXIMUM"} : (tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<*xf16> +// CHECK: [[VAR_1_:%.+]] = "zhigh.Stick"([[PARAM_0_]]) {layout = "3DS"} : (tensor<3x2x2xf32>) -> tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>> +// CHECK: [[VAR_2_:%.+]] = "zhigh.ReduceMax"([[VAR_1_]]) : (tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<*xf16> // CHECK: [[VAR_3_:%.+]] = "zhigh.Unstick"([[VAR_2_]]) : (tensor<*xf16>) -> tensor<3x2x1xf32> // CHECK: return [[VAR_3_]] : tensor<3x2x1xf32> // CHECK: } - } \ No newline at end of file +} + +// ----- + +func.func @test_reduce_max_axes_minus_one(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<-1> : tensor<1xi64> } : () -> tensor<1xi64> + %0 ="onnx.ReduceMax"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_max_axes_minus_one +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<3x2x1xf32> { +// CHECK: [[VAR_1_:%.+]] = "zhigh.Stick"([[PARAM_0_]]) {layout = "3DS"} : (tensor<3x2x2xf32>) -> tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>> +// CHECK: [[VAR_2_:%.+]] = "zhigh.ReduceMax"([[VAR_1_]]) : (tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<*xf16> +// CHECK: [[VAR_3_:%.+]] = "zhigh.Unstick"([[VAR_2_]]) : (tensor<*xf16>) -> tensor<3x2x1xf32> +// CHECK: return [[VAR_3_]] : tensor<3x2x1xf32> +// CHECK: } +} + +// ----- + +func.func @test_reduce_max_not_lowered_unknown_axis(%arg0 : tensor<3x2x2xf32>, %arg1: tensor<1xi64>) -> tensor<*xf32> { + %0 ="onnx.ReduceMax"(%arg0, %arg1) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_max_not_lowered_unknown_axis +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>, [[PARAM_1_:%.+]]: tensor<1xi64>) -> tensor { +// CHECK: [[VAR_0_:%.+]] = "onnx.ReduceMax"([[PARAM_0_]], [[PARAM_1_]]) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>) -> tensor +// CHECK: return [[VAR_0_]] : tensor +// CHECK: } +} + +// ----- + +func.func @test_reduce_max_axes_not_lowered_not_innermost_axis(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<0> : tensor<1xi64> } : () -> tensor<1xi64> + %0 ="onnx.ReduceMax"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_max_axes_not_lowered_not_innermost_axis +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<1x2x2xf32> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<0> : tensor<1xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.ReduceMax"([[PARAM_0_]], [[VAR_0_]]) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>) -> tensor<1x2x2xf32> +// CHECK: return [[VAR_1_]] : tensor<1x2x2xf32> +// CHECK: } +} + +// ----- + +func.func @test_reduce_max_axes_not_lowered_not_multiple_axes(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<[2, 0]> : tensor<2xi64> } : () -> tensor<2xi64> + %0 ="onnx.ReduceMax"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<2xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_max_axes_not_lowered_not_multiple_axes +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<1x2x1xf32> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<[2, 0]> : tensor<2xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.ReduceMax"([[PARAM_0_]], [[VAR_0_]]) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<2xi64>) -> tensor<1x2x1xf32> +// CHECK: return [[VAR_1_]] : tensor<1x2x1xf32> +// CHECK: } +} diff --git a/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemin.mlir b/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemin.mlir index 6bbd5ac19f..74c825a85c 100644 --- a/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemin.mlir +++ b/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/reducemin.mlir @@ -1,17 +1,75 @@ -// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --shape-inference --convert-onnx-to-zhigh %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --shape-inference --convert-onnx-to-zhigh --canonicalize %s -split-input-file | FileCheck %s - func.func @test_reduce_min_axes_defined_noop_0(%arg0: tensor<1x2x4xf32>) -> tensor<*xf32> { - %0 = "onnx.Constant"() {value = dense<[2]> : tensor<1xi64> } : () -> tensor<1xi64> - %1 ="onnx.ReduceMin"(%arg0, %0) {keepdims = 1: si64, noop_with_empty_axes = 0: si64} : (tensor<1x2x4xf32>, tensor<1xi64>) -> tensor<*xf32> - "func.return"(%1) : (tensor<*xf32>) -> () +func.func @test_reduce_min_axes_defined_noop_0(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<[2]> : tensor<1xi64> } : () -> tensor<1xi64> + %0 ="onnx.ReduceMin"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () // mlir2FileCheck.py // CHECK-LABEL: func.func @test_reduce_min_axes_defined_noop_0 -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x2x4xf32>) -> tensor<1x2x1xf32> { -// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<2> : tensor<1xi64> -// CHECK-DAG: [[VAR_1_:%.+]] = "zhigh.Stick"([[PARAM_0_]]) {layout = "3DS"} : (tensor<1x2x4xf32>) -> tensor<1x2x4xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// CHECK: [[VAR_2_:%.+]] = "zhigh.ReduceMin"([[VAR_1_]]) {op_type = "REDUCE_OP_MINIMUM"} : (tensor<1x2x4xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<*xf16> -// CHECK: [[VAR_3_:%.+]] = "zhigh.Unstick"([[VAR_2_]]) : (tensor<*xf16>) -> tensor<1x2x1xf32> -// CHECK: return [[VAR_3_]] : tensor<1x2x1xf32> +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<3x2x1xf32> { +// CHECK: [[VAR_1_:%.+]] = "zhigh.Stick"([[PARAM_0_]]) {layout = "3DS"} : (tensor<3x2x2xf32>) -> tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>> +// CHECK: [[VAR_2_:%.+]] = "zhigh.ReduceMin"([[VAR_1_]]) : (tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<*xf16> +// CHECK: [[VAR_3_:%.+]] = "zhigh.Unstick"([[VAR_2_]]) : (tensor<*xf16>) -> tensor<3x2x1xf32> +// CHECK: return [[VAR_3_]] : tensor<3x2x1xf32> // CHECK: } -} \ No newline at end of file +} + +// ----- + +func.func @test_reduce_min_axes_minus_one(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<-1> : tensor<1xi64> } : () -> tensor<1xi64> + %0 ="onnx.ReduceMin"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_min_axes_minus_one +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<3x2x1xf32> { +// CHECK: [[VAR_1_:%.+]] = "zhigh.Stick"([[PARAM_0_]]) {layout = "3DS"} : (tensor<3x2x2xf32>) -> tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>> +// CHECK: [[VAR_2_:%.+]] = "zhigh.ReduceMin"([[VAR_1_]]) : (tensor<3x2x2xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<*xf16> +// CHECK: [[VAR_3_:%.+]] = "zhigh.Unstick"([[VAR_2_]]) : (tensor<*xf16>) -> tensor<3x2x1xf32> +// CHECK: return [[VAR_3_]] : tensor<3x2x1xf32> +// CHECK: } +} + +// ----- + +func.func @test_reduce_min_not_lowered_unknown_axis(%arg0 : tensor<3x2x2xf32>, %arg1: tensor<1xi64>) -> tensor<*xf32> { + %0 ="onnx.ReduceMin"(%arg0, %arg1) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_min_not_lowered_unknown_axis +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>, [[PARAM_1_:%.+]]: tensor<1xi64>) -> tensor { +// CHECK: [[VAR_0_:%.+]] = "onnx.ReduceMin"([[PARAM_0_]], [[PARAM_1_]]) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>) -> tensor +// CHECK: return [[VAR_0_]] : tensor +// CHECK: } +} + +// ----- + +func.func @test_reduce_min_axes_not_lowered_not_innermost_axis(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<0> : tensor<1xi64> } : () -> tensor<1xi64> + %0 ="onnx.ReduceMin"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_min_axes_not_lowered_not_innermost_axis +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<1x2x2xf32> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<0> : tensor<1xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.ReduceMin"([[PARAM_0_]], [[VAR_0_]]) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<1xi64>) -> tensor<1x2x2xf32> +// CHECK: return [[VAR_1_]] : tensor<1x2x2xf32> +// CHECK: } +} + +// ----- + +func.func @test_reduce_min_axes_not_lowered_not_multiple_axes(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %cst = "onnx.Constant"() {value = dense<[2, 0]> : tensor<2xi64> } : () -> tensor<2xi64> + %0 ="onnx.ReduceMin"(%arg0, %cst) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<2xi64>)-> tensor<*xf32> + "func.return"(%0) : (tensor<*xf32>) -> () + +// CHECK-LABEL: func.func @test_reduce_min_axes_not_lowered_not_multiple_axes +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<3x2x2xf32>) -> tensor<1x2x1xf32> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<[2, 0]> : tensor<2xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.ReduceMin"([[PARAM_0_]], [[VAR_0_]]) {keepdims = 1 : si64, noop_with_empty_axes = 0 : si64} : (tensor<3x2x2xf32>, tensor<2xi64>) -> tensor<1x2x1xf32> +// CHECK: return [[VAR_1_]] : tensor<1x2x1xf32> +// CHECK: } +} diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemax.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemax.mlir index 83393283cf..06c390c17e 100644 --- a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemax.mlir +++ b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemax.mlir @@ -4,13 +4,22 @@ func.func @reduce_max_axes_defined_noop_0(%arg0: tensor<3x4x5xf16, #zhigh.layout %0 = "zhigh.ReduceMax"(%arg0) : (tensor<3x4x5xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> return %0 : tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// mlir2FileCheck.py // CHECK-DAG: [[MAP_0_:#.+]] = affine_map<(d0, d1, d2) -> (d0, d2 floordiv 64, 0, d1 floordiv 32, d1 mod 32, d2 mod 64)> // CHECK-LABEL: func.func @reduce_max_axes_defined_noop_0 // CHECK-SAME: ([[PARAM_0_:%.+]]: memref<3x4x5xf16, #map>) -> memref<3x4x1xf16, #map> { -// CHECK: [[VAR_0_:%.+]] = builtin.unrealized_conversion_cast [[PARAM_0_]] : memref<3x4x5xf16, #map> to tensor<3x4x5xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// CHECK: [[VAR_1_:%.+]] = "zhigh.ReduceMax"([[VAR_0_]]) {op_type = "REDUCE_OP_MAXIMUM"} : (tensor<3x4x5xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// CHECK: [[VAR_2_:%.+]] = builtin.unrealized_conversion_cast [[VAR_1_]] : tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> to memref<3x4x1xf16, #map> -// CHECK: return [[VAR_2_]] : memref<3x4x1xf16, #map> +// CHECK-DAG: [[CST_5_:%.+]] = arith.constant 5 : i64 +// CHECK-DAG: [[CST_4_:%.+]] = arith.constant 4 : i64 +// CHECK-DAG: [[CST_3_:%.+]] = arith.constant 3 : i64 +// CHECK-DAG: [[CST_2_:%.+]] = arith.constant 2 : index +// CHECK-DAG: [[CST_0_:%.+]] = arith.constant 0 : index +// CHECK-DAG: [[CST_1_:%.+]] = arith.constant 1 : index +// CHECK-DAG: [[RES_:%.+]] = memref.alloc() {{.*}}: memref<3x4x1xf16, #map> +// CHECK-DAG: [[RES_1_:%.+]] = memref.alloc() {{.*}}: memref<3xi64> +// CHECK: krnl.store [[CST_3_]], [[RES_1_]]{{.}}[[CST_0_]]{{.}} : memref<3xi64> +// CHECK: krnl.store [[CST_4_]], [[RES_1_]]{{.}}[[CST_1_]]{{.}} : memref<3xi64> +// CHECK: krnl.store [[CST_5_]], [[RES_1_]]{{.}}[[CST_2_]]{{.}} : memref<3xi64> +// CHECK: [[RES_2_:%.+]] = memref.alloc() {{.*}}: memref<8192xi8> +// CHECK: "zlow.reducemax"([[PARAM_0_]], [[RES_2_]], [[RES_1_]], [[RES_]]) {layout = "3DS"} : (memref<3x4x5xf16, #map>, memref<8192xi8>, memref<3xi64>, memref<3x4x1xf16, #map>) -> () +// CHECK: return [[RES_]] : memref<3x4x1xf16, #map> // CHECK: } } diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemin.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemin.mlir index 926c3f4ede..faf425537c 100644 --- a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemin.mlir +++ b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/reducemin.mlir @@ -4,13 +4,22 @@ func.func @reduce_min_axes_defined_noop_0(%arg0: tensor<3x4x5xf16, #zhigh.layout %0 = "zhigh.ReduceMin"(%arg0) : (tensor<3x4x5xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> return %0 : tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// mlir2FileCheck.py // CHECK-DAG: [[MAP_0_:#.+]] = affine_map<(d0, d1, d2) -> (d0, d2 floordiv 64, 0, d1 floordiv 32, d1 mod 32, d2 mod 64)> // CHECK-LABEL: func.func @reduce_min_axes_defined_noop_0 // CHECK-SAME: ([[PARAM_0_:%.+]]: memref<3x4x5xf16, #map>) -> memref<3x4x1xf16, #map> { -// CHECK: [[VAR_0_:%.+]] = builtin.unrealized_conversion_cast [[PARAM_0_]] : memref<3x4x5xf16, #map> to tensor<3x4x5xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// CHECK: [[VAR_1_:%.+]] = "zhigh.ReduceMin"([[VAR_0_]]) {op_type = "REDUCE_OP_MINIMUM"} : (tensor<3x4x5xf16, #zhigh.layout<{dataLayout = "3DS"}>>) -> tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> -// CHECK: [[VAR_2_:%.+]] = builtin.unrealized_conversion_cast [[VAR_1_]] : tensor<3x4x1xf16, #zhigh.layout<{dataLayout = "3DS"}>> to memref<3x4x1xf16, #map> -// CHECK: return [[VAR_2_]] : memref<3x4x1xf16, #map> +// CHECK-DAG: [[CST_5_:%.+]] = arith.constant 5 : i64 +// CHECK-DAG: [[CST_4_:%.+]] = arith.constant 4 : i64 +// CHECK-DAG: [[CST_3_:%.+]] = arith.constant 3 : i64 +// CHECK-DAG: [[CST_2_:%.+]] = arith.constant 2 : index +// CHECK-DAG: [[CST_0_:%.+]] = arith.constant 0 : index +// CHECK-DAG: [[CST_1_:%.+]] = arith.constant 1 : index +// CHECK-DAG: [[RES_:%.+]] = memref.alloc() {{.*}}: memref<3x4x1xf16, #map> +// CHECK-DAG: [[RES_1_:%.+]] = memref.alloc() {{.*}}: memref<3xi64> +// CHECK: krnl.store [[CST_3_]], [[RES_1_]]{{.}}[[CST_0_]]{{.}} : memref<3xi64> +// CHECK: krnl.store [[CST_4_]], [[RES_1_]]{{.}}[[CST_1_]]{{.}} : memref<3xi64> +// CHECK: krnl.store [[CST_5_]], [[RES_1_]]{{.}}[[CST_2_]]{{.}} : memref<3xi64> +// CHECK: [[RES_2_:%.+]] = memref.alloc() {{.*}}: memref<8192xi8> +// CHECK: "zlow.reducemin"([[PARAM_0_]], [[RES_2_]], [[RES_1_]], [[RES_]]) {layout = "3DS"} : (memref<3x4x5xf16, #map>, memref<8192xi8>, memref<3xi64>, memref<3x4x1xf16, #map>) -> () +// CHECK: return [[RES_]] : memref<3x4x1xf16, #map> // CHECK: } -} \ No newline at end of file +}