From 38447124c60c6de57035c004c53327ee0b209ee2 Mon Sep 17 00:00:00 2001 From: Jhalak Patel Date: Sun, 13 Oct 2024 15:03:11 -0700 Subject: [PATCH] Add plan dialect changes --- .../include/mlir-tensorrt/Compiler/Options.h | 4 + .../Compiler/StableHloToExecutable.h | 3 + .../mlir-tensorrt/Dialect/Plan/IR/PlanOps.td | 68 +++++- .../Dialect/Plan/Transforms/Passes.h | 3 +- .../Dialect/Plan/Transforms/Passes.td | 15 +- .../lib/Compiler/StableHloToExecutable.cpp | 24 +- .../TensorRTRuntimeToExecutor.cpp | 8 +- .../compiler/lib/Dialect/Plan/IR/PlanOps.cpp | 69 ++++++ .../Dialect/Plan/Transforms/AllocTensors.cpp | 11 +- .../Plan/Transforms/CreateClosedRegions.cpp | 214 +++++++++++------- .../Plan/Transforms/OutlineClusters.cpp | 92 +++++++- .../lib/Dialect/Plan/Transforms/Passes.cpp | 32 ++- .../Plan/Transforms/StablehloClustering.cpp | 2 +- .../lib/Conversion/MemRefToExecutor.cpp | 54 ++++- .../Lua/Modules/TensorRT/TensorRTModule.cpp | 3 - .../test/lib/BufferizationTestPass.cpp | 14 +- .../tensorrt-runtime-to-executor.mlir | 51 +++-- .../IntegrationTests/test_stablehlo_add.py | 77 ++++--- .../test_stablehlo_alloc_enqueue.py | 124 ---------- 19 files changed, 577 insertions(+), 291 deletions(-) delete mode 100644 mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/Options.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/Options.h index cd9c5e393..535ccefee 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/Options.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/Options.h @@ -34,6 +34,9 @@ namespace mlirtrt::compiler { /// DebugOptions are options that are common to different compiler API /// interfaces. struct DebugOptions { + /// Dump textual pipeline passes + bool dumpTextualPipeline = false; + /// A directory path where the IR will be dumped during compilation /// using the `mlir-print-ir-tree-dir` mechanism. std::string dumpIRPath = ""; @@ -49,6 +52,7 @@ struct DebugOptions { mlir::SmallVector llvmDebugTypes = {}; void addToOptions(mlir::OptionsContext &context) { + context.addOption("dump-textual-pipeline", dumpTextualPipeline); context.addOption("mlir-print-ir-tree-dir", dumpIRPath, llvm::cl::init("")); context.addOption("debug", enableLLVMDebugFlag); context.addList("debug-only", llvmDebugTypes, diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h index 8358ff0dd..8a6103b89 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h @@ -128,6 +128,9 @@ struct StableHLOToExecutableOptions : public mlir::OptionsContext { /// Whether to disallow host tensors in TensorRT clusters. bool disallowHostTensorsInTensorRTClusters = false; + /// Whether to use non-DPS style calling convention. + bool useNonDPSCallConv = false; + /// Entrypoint function name. std::string entrypoint = "main"; diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/IR/PlanOps.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/IR/PlanOps.td index e175a8ff7..8a091fb1a 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/IR/PlanOps.td +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/IR/PlanOps.td @@ -267,6 +267,72 @@ def Plan_InlineClosedGroupOp : Plan_GroupOpBase<"inline_closed_group", [ }]; } + +//===----------------------------------------------------------------------===// +// InlineClosedGroupOp +//===----------------------------------------------------------------------===// + +def Plan_InlineClosedGroupNonDPSOp : Plan_GroupOpBase<"inline_closed_group_non_dps", [ + IsolatedFromAbove, + SingleBlockImplicitTerminator<"plan::YieldOp">, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods +]> { + let description = [{ + // TODO: Add explanation for non-DPS version for inline closed group op. + }]; + let arguments = (ins Variadic>:$inputs, + BoundsAttrArray:$input_attrs, + AnyAttr:$target); + + let results = (outs Variadic>:$results); + + let assemblyFormat = [{ + `target` `(` $target `)` `\n` + `inputs` `(` ( $inputs^ `:` type($inputs) `)` ) : ( `)` ) ? `\n` + `in_attrs` $input_attrs `\n` + attr-dict-with-keyword `->` type($results) + $body + }]; + + let hasVerifier = 1; + + let skipDefaultBuilders = 1; + + let builders = [ + OpBuilder<(ins "TypeRange":$results, + "Attribute":$target, + "ValueRange":$inputs, + CArg<"ArrayRef", "{}">:$input_attrs)>, + ]; + + let extraClassDeclaration = baseExtraClassDeclaration # [{ + + /// Returns true if the `i-th` input argument has a tensor type. + bool argHasTensorType(unsigned inputIdx) { + assert(inputIdx < getInputs().size() && "input index out-of-bounds"); + return isa(getInputs()[inputIdx].getType()); + } + + /// Returns the i-th input argument's bounds attribute. + BoundsAttr getInputBoundsAttr(unsigned inputIdx) { + assert(inputIdx < getInputs().size() && "input index out-of-bounds"); + return cast(getInputAttrs()[inputIdx]); + } + + /// Populate the `input_attrs` from an array of BoundsAttrs. + void setInputAttrsAttr(ArrayRef boundsAttrs) { + setInputAttrsAttr(::mlir::ArrayAttr::get( + getOperation()->getContext(), + ArrayRef(boundsAttrs.begin(), boundsAttrs.end()) + )); + } + + }]; +} + //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// @@ -276,7 +342,7 @@ def Plan_YieldOp : Plan_Op<"yield", [ Terminator, ReturnLike, ParentOneOf<["plan::InlineGroupOp", - "plan::InlineClosedGroupOp"]>]> { + "plan::InlineClosedGroupOp", "plan::InlineClosedGroupNonDPSOp"]>]> { let arguments = (ins Variadic:$results); diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.h index 41429bb6b..b6725cdcd 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.h @@ -69,7 +69,8 @@ executorOneShotModuleBufferize(ModuleOp targetOp, const ExecutorBufferizationOptions &options); /// Build a pipeline (targeting ModuleOp) for bufferization. -void buildPlanBufferizationPipeline(OpPassManager &pm); +void buildPlanBufferizationPipeline( + OpPassManager &pm, const plan::PlanAllocTensorsPassOptions &options); /// Build a post-bufferization pipeline that performs optimizations on memrefs. void buildPlanBufferOptimizationPipeline(OpPassManager &pm); diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.td index 5f11974bc..e920df380 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.td +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Dialect/Plan/Transforms/Passes.td @@ -248,6 +248,9 @@ def StablehloClusteringPass : Pass<"stablehlo-clustering", "::mlir::ModuleOp"> { Option<"entrypoint", "entrypoint", "std::string", "\"\"", "the name of the entrypoint function; if empty then the clustering runs" " on all functions">, + Option<"useNonDPSCallConv", + "use-non-dps-call-conv", "bool", "false", + "allow tensorrt based output allocations using output allocator">, Option<"disallowHostTensorsInTensorRTClusters", "disallow-host-tensors-in-tensorrt-clusters", "bool", "false", "don't cluster host tensors in TensorRT clusters">, @@ -332,7 +335,10 @@ def CreateClosedRegionsPass : Pass<"plan-create-closed-regions", "::mlir::Module Option<"testPreWalkOrder", "test-pre-walk-order", "bool", "false", "(used only in testing) specifies to outline regions by walking in " " pre-order; used for verifying results are not sensitive " - "to traversal order"> + "to traversal order">, + Option<"useNonDPSCallConv", "use-non-dps-call-conv", "bool", + /*default=*/"false", + "Allow TensorRT-based output allocations using output allocator"> ]; let dependentDialects = [ @@ -428,6 +434,13 @@ def PlanAllocTensorsPass : Pass<"plan-alloc-tensors", "::mlir::bufferization::BufferizationDialect", "::mlir::plan::PlanDialect" ]; + + let options = [ + Option<"useNonDPSCallConv", "use-non-dps-call-conv", "bool", + /*default=*/"false", + "Allow TensorRT-based output allocations using output allocator"> + ]; + } //===----------------------------------------------------------------------===// diff --git a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp index 0317514d6..47d986e0f 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp +++ b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp @@ -222,6 +222,9 @@ StableHLOToExecutableOptions::StableHLOToExecutableOptions( disallowHostTensorsInTensorRTClusters, llvm::cl::init(false), llvm::cl::desc("Don't allow TensorRt clusters to contain host tensor " "calculations (but they can still be inputs)")); + addOption("use-non-dps-call-conv", useNonDPSCallConv, + llvm::cl::init(false), + llvm::cl::desc("allow tensorrt based output allocations using output allocator")); addOption("executor-index-bitwidth", executorIndexBitwidth, llvm::cl::init(64)); addOption("device-compute-capability", deviceComputeCapability, @@ -303,6 +306,7 @@ void StableHloToExecutableTask::buildStablehloClusteringPipeline( plan::StablehloClusteringPassOptions clusteringOpts{}; clusteringOpts.disallowHostTensorsInTensorRTClusters = opts.disallowHostTensorsInTensorRTClusters; + clusteringOpts.useNonDPSCallConv = opts.useNonDPSCallConv; clusteringOpts.entrypoint = opts.entrypoint; plan::buildPlanSegmentationPipeline(pm, clusteringOpts); @@ -336,7 +340,9 @@ void StableHloToExecutableTask::buildPostClusteringPipeline( // Perform bufferization. pm.addPass(createMemRefCastEliminationPass()); - pm.addPass(plan::createPlanAllocTensorsPass()); + plan::PlanAllocTensorsPassOptions allocTensorsOpts{}; + allocTensorsOpts.useNonDPSCallConv = opts.useNonDPSCallConv; + pm.addPass(plan::createPlanAllocTensorsPass(allocTensorsOpts)); pm.addPass(plan::createPlanBufferizePass()); pm.addPass(createMemRefCastEliminationPass()); pm.addPass(createCanonicalizerPass()); @@ -485,13 +491,14 @@ StableHloToExecutableTask::compileStableHLOToExecutable( runner = pm.get(); } - runner->printAsTextualPipeline(llvm::dbgs()); + if (options.debugOptions.dumpTextualPipeline) + runner->printAsTextualPipeline(llvm::dbgs()); // Setup pass manager - // if (failed(runner->run(module))) - // return getInternalErrorStatus( - // "failed to run compilation on module with symbol name: {0}", - // module.getName() ? *module.getName() : "no-symbol-name"); + if (failed(runner->run(module))) + return getInternalErrorStatus( + "failed to run compilation on module with symbol name: {0}", + module.getName() ? *module.getName() : "no-symbol-name"); // Translate to Runtime Executable FailureOr> exeStorage = @@ -524,6 +531,10 @@ struct ClusteringPipelineCliOpts *this, "device-compute-capability", llvm::cl::desc("target device compute capability (SM version)"), llvm::cl::init(60)}; + Option useNonDPSCallConv{ + *this, "use-non-dps-call-conv", + llvm::cl::desc("allow tensorrt based output allocations using output allocator"), + llvm::cl::init(false)}; Option deviceMaxSharedMemoryPerBlockKb{ *this, "device-max-smem-per-block", llvm::cl::desc("max shared memory per block (in kilobytes)"), @@ -551,6 +562,7 @@ static StableHLOToExecutableOptions populateStablehloClusteringPipelineOpts( opts.deviceComputeCapability = cliOpts.deviceComputeCapability; opts.deviceMaxSharedMemoryPerBlockKb = cliOpts.deviceMaxSharedMemoryPerBlockKb; + opts.useNonDPSCallConv = cliOpts.useNonDPSCallConv; opts.shouldInferDeviceOptionsFromHost = cliOpts.inferDeviceOptionsFromHost; opts.entrypoint = cliOpts.entrypoint; return opts; diff --git a/mlir-tensorrt/compiler/lib/Conversion/TensorRTRuntimeToExecutor/TensorRTRuntimeToExecutor.cpp b/mlir-tensorrt/compiler/lib/Conversion/TensorRTRuntimeToExecutor/TensorRTRuntimeToExecutor.cpp index a6778c51a..8c92e26a6 100644 --- a/mlir-tensorrt/compiler/lib/Conversion/TensorRTRuntimeToExecutor/TensorRTRuntimeToExecutor.cpp +++ b/mlir-tensorrt/compiler/lib/Conversion/TensorRTRuntimeToExecutor/TensorRTRuntimeToExecutor.cpp @@ -263,7 +263,7 @@ struct ConvertEnqueueAllocToCall ImplicitLocOpBuilder b(op.getLoc(), rewriter); // Function name for the enqueue alloc operation - std::string funcName = "_trtrt_alloc_enqueue"; + std::string funcName = "_trtrt_enqueue_alloc"; // Create new operands for the call op SmallVector newOperands = {adaptor.getExecutionContext(), @@ -429,8 +429,10 @@ struct ConvertEnqueueAllocToCall resultRange.append(shapes.begin(), shapes.end()); resultRange.append(strides.begin(), strides.end()); - Value result = b.create(executor::TableType::get( - b.getContext(), llvm::to_vector(TypeRange(resultRange)))); + Value result = b.create( + executor::TableType::get(b.getContext(), + llvm::to_vector(TypeRange(resultRange))), + resultRange); results.push_back(result); } diff --git a/mlir-tensorrt/compiler/lib/Dialect/Plan/IR/PlanOps.cpp b/mlir-tensorrt/compiler/lib/Dialect/Plan/IR/PlanOps.cpp index f8c6ecf5f..27c1dd884 100644 --- a/mlir-tensorrt/compiler/lib/Dialect/Plan/IR/PlanOps.cpp +++ b/mlir-tensorrt/compiler/lib/Dialect/Plan/IR/PlanOps.cpp @@ -465,6 +465,75 @@ void InlineClosedGroupOp::build(OpBuilder &b, OperationState &state, state.addTypes(TypeRange(outs)); } +//===----------------------------------------------------------------------===// +// InlineClosedGroupNonDPSOp +//===----------------------------------------------------------------------===// + +LogicalResult InlineClosedGroupNonDPSOp::verify() { + SmallVector inputAttrs = + llvm::to_vector(getInputAttrs().getAsRange()); + if (inputAttrs.size() != getInputs().size()) + return emitOpError("expected number of inputs (") + << getInputs().size() + << " to equal the number of input_attrs BoundsAttrs (" + << inputAttrs.size() << ")"; + + for (auto [idx, type] : llvm::enumerate(TypeRange(getInputs()))) { + BoundsAttr boundsAttr = inputAttrs[idx]; + if (failed(verifyBoundsAttr("input argument", idx, type, boundsAttr, + [&]() { return emitOpError(); }))) + return failure(); + } + + return success(); +} + +void InlineClosedGroupNonDPSOp::getSuccessorRegions( + RegionBranchPoint point, SmallVectorImpl ®ions) { + // If the predecessor is the InlineClosedGroupOp, branch into the body. + if (point.isParent()) { + regions.push_back(RegionSuccessor(&getBody(), getBody().getArguments())); + return; + } + + // Otherwise, the region branches back to the parent operation. + regions.push_back(RegionSuccessor(getResults())); +} + +OperandRange +InlineClosedGroupNonDPSOp::getEntrySuccessorOperands(RegionBranchPoint point) { + return getOperands(); +} + +void InlineClosedGroupNonDPSOp::getAsmBlockArgumentNames( + Region ®ion, OpAsmSetValueNameFn setNameFn) { + assert(region.front().getNumArguments() == getInputs().size() && + "expected one block arg for each input argument"); + for (BlockArgument arg : region.front().getArguments()) { + setNameFn(arg, "in"); + } +} + +void InlineClosedGroupNonDPSOp::build(OpBuilder &b, OperationState &state, TypeRange resultTypes, + Attribute target, ValueRange inputs, + ArrayRef input_attrs) { + state.addTypes(resultTypes); + state.addOperands(inputs); + state.getOrAddProperties().target = target; + state.getOrAddProperties().setInputAttrs(b.getArrayAttr( + SmallVector(input_attrs.begin(), input_attrs.end()))); + Region *body = state.addRegion(); + auto getLocs = [](ValueRange r) { + SmallVector locs; + locs.reserve(r.size()); + for (Value v : r) + locs.push_back(v.getLoc()); + return locs; + }; + (void)body->emplaceBlock(); + body->addArguments(TypeRange(inputs), getLocs(inputs)); +} + //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// diff --git a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/AllocTensors.cpp b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/AllocTensors.cpp index 8437bdeda..458451b29 100644 --- a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/AllocTensors.cpp +++ b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/AllocTensors.cpp @@ -856,11 +856,14 @@ class AllocTensorsPass } } - // First rewrite public functions to conform to DPS style. IRRewriter rewriter(ctx); - if (failed(rewriteNotPrivateFuncsToDPS(rewriter, op))) { - op->emitError("Failed to convert non-private functions to DPS"); - return signalPassFailure(); + + if (!useNonDPSCallConv) { + // First rewrite public functions to conform to DPS style. + if (failed(rewriteNotPrivateFuncsToDPS(rewriter, op))) { + op->emitError("Failed to convert non-private functions to DPS"); + return signalPassFailure(); + } } // Rewrite SCF for and while loop bodies for better bufferization results, diff --git a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/CreateClosedRegions.cpp b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/CreateClosedRegions.cpp index f939940cd..fe6910d23 100644 --- a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/CreateClosedRegions.cpp +++ b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/CreateClosedRegions.cpp @@ -351,70 +351,11 @@ static void remapAnalysisState(DataFlowSolver &solver, ValueRange originals, } } -static LogicalResult createClosedGroupOp(RewriterBase &rewriter, - plan::InlineGroupOp op, - DataFlowSolver &solver) { - OpBuilder::InsertionGuard g(rewriter); - Location loc = op.getLoc(); - - // Materialize the destination operands. - SmallVector destinationOperands; - destinationOperands.reserve(op.getNumResults()); - for (OpResult res : op->getOpResults()) { - FailureOr destResult = - materializeDestinationOperand(rewriter, res.getLoc(), op, - res.getResultNumber(), solver); - if (failed(destResult)) - return emitError(res.getLoc()) - << "failed to materialize destination operand of type " - << res.getType(); - destinationOperands.push_back(*destResult); - } - - // Make the region isolated from above. This captures the input operands. - SmallVector inputs = makeRegionIsolatedFromAbove( - rewriter, op.getRegion(), [&](Operation *producer) { - return shouldCloneProducer(producer, op.getRegion()); - }); - - rewriter.setInsertionPoint(op); - auto closedGroupOp = rewriter.create( - op.getLoc(), /*target=*/op.getTarget(), - /*inputs=*/inputs, - /*outs=*/ - llvm::map_to_vector(destinationOperands, - [](const auto &x) { return x.destinationOperand; })); - - rewriter.inlineBlockBefore( - &op.getRegion().front(), &closedGroupOp.getRegion().front(), - closedGroupOp.getRegion().front().end(), - closedGroupOp.getRegion().getArguments().take_front( - op.getRegion().getNumArguments())); - - SmallVector replacements; - replacements.reserve(destinationOperands.size()); - for (auto [newResult, destOperand, originalType] : - llvm::zip(closedGroupOp->getResults(), destinationOperands, - op->getResultTypes())) { - if (destOperand.strategy == - DestinationOperandMaterializationStrategy::ExactShape) { - replacements.push_back(newResult); - continue; - } - - assert(destOperand.exactShape && - "expected materialized shape values to be provided for " - "this materialization strategy"); - - replacements.push_back(newResult); - } - - // Since we are about to replace values that may be inputs to other regions - // ops, we need to update the solver to populate the replacement TensorKind - // information. - remapAnalysisState(solver, op->getResults(), replacements); - rewriter.replaceOp(op, replacements); +static FailureOr> +getInputAttributes(RewriterBase &rewriter, DataFlowSolver &solver, Location loc, + SmallVector const &inputs) { + // Compute input tensor kinds. SmallVector inputTensorKinds; inputTensorKinds.reserve(inputs.size()); for (auto [idx, input] : llvm::enumerate(inputs)) { @@ -425,30 +366,16 @@ static LogicalResult createClosedGroupOp(RewriterBase &rewriter, const TensorKindLattice *tensorKindLattice = solver.lookupState(input); if (!tensorKindLattice || tensorKindLattice->getValue().isUninitialized()) - return closedGroupOp->emitOpError("input operand #") - << idx << " of type " << input.getType() + return emitError(loc) + << ("input operand #") << idx << " of type " << input.getType() << " does not have a TensorKind associated with it"; inputTensorKinds.push_back(tensorKindLattice->getValue()); } - // Create the closed region result profile attrs. - SmallVector resultAttrs; - for (const DestinationOperandMaterializationResult &dest : - destinationOperands) { - auto boundsAttr = BoundsAttr::getChecked( - mlir::detail::getDefaultDiagnosticEmitFn(loc), rewriter.getContext(), - BoundsKind::Shape, ArrayRef(dest.constantShapeLowerBound), - ArrayRef(dest.constantShapeUpperBound)); - if (!boundsAttr) - return failure(); - resultAttrs.push_back(boundsAttr); - } - closedGroupOp.setResAttrsAttr(resultAttrs); - // Create the shape profile attributes for the inputs. SmallVector inputAttrs; inputAttrs.reserve(inputTensorKinds.size()); - for (auto [idx, input] : llvm::enumerate(closedGroupOp.getInputs())) { + for (auto [idx, input] : llvm::enumerate(inputs)) { auto tensorType = dyn_cast(input.getType()); if (!tensorType) { inputAttrs.push_back(BoundsAttr::get(rewriter.getContext())); @@ -459,7 +386,7 @@ static LogicalResult createClosedGroupOp(RewriterBase &rewriter, if (inputTensorKinds[idx].isHostVisible()) { const auto *lattice = solver.lookupState(input); if (!lattice) - return emitError(closedGroupOp.getLoc()) + return emitError(loc) << "host-visible input operand #" << idx << " of type " << input.getType() << " does not have value bounds information attached"; @@ -470,7 +397,7 @@ static LogicalResult createClosedGroupOp(RewriterBase &rewriter, auto [lbAttr, ubAttr] = bounds.getAsElementsAttr(tensorType); BoundsAttr boundsAttr = BoundsAttr::getChecked( - closedGroupOp.getLoc(), rewriter.getContext(), BoundsKind::Value, + loc, rewriter.getContext(), BoundsKind::Value, DenseI64ArrayAttr{}, DenseI64ArrayAttr{}, lbAttr, ubAttr); if (!boundsAttr) return failure(); @@ -503,8 +430,127 @@ static LogicalResult createClosedGroupOp(RewriterBase &rewriter, return failure(); inputAttrs.push_back(boundsAttr); } - closedGroupOp.setInputAttrsAttr(inputAttrs); + return inputAttrs; +} + +static LogicalResult createClosedGroupOp(RewriterBase &rewriter, + plan::InlineGroupOp op, + DataFlowSolver &solver, + bool useNonDPSCallConv) { + OpBuilder::InsertionGuard g(rewriter); + Location loc = op.getLoc(); + + SmallVector destinationOperands; + if (!useNonDPSCallConv) { + // Materialize the destination operands. + destinationOperands.reserve(op.getNumResults()); + for (OpResult res : op->getOpResults()) { + FailureOr destResult = + materializeDestinationOperand(rewriter, res.getLoc(), op, + res.getResultNumber(), solver); + if (failed(destResult)) + return emitError(res.getLoc()) + << "failed to materialize destination operand of type " + << res.getType(); + destinationOperands.push_back(*destResult); + } + } + + // Make the region isolated from above. This captures the input operands. + SmallVector inputs = makeRegionIsolatedFromAbove( + rewriter, op.getRegion(), [&](Operation *producer) { + return shouldCloneProducer(producer, op.getRegion()); + }); + + rewriter.setInsertionPoint(op); + + if (!useNonDPSCallConv) { + InlineClosedGroupOp closedGroupOp = rewriter.create( + op.getLoc(), /*target=*/op.getTarget(), + /*inputs=*/inputs, + /*outs=*/ + llvm::map_to_vector(destinationOperands, [](const auto &x) { + return x.destinationOperand; + })); + + rewriter.inlineBlockBefore( + &op.getRegion().front(), &closedGroupOp.getRegion().front(), + closedGroupOp.getRegion().front().end(), + closedGroupOp.getRegion().getArguments().take_front( + op.getRegion().getNumArguments())); + + SmallVector replacements; + replacements.reserve(destinationOperands.size()); + for (auto [newResult, destOperand, originalType] : + llvm::zip(closedGroupOp->getResults(), destinationOperands, + op->getResultTypes())) { + if (destOperand.strategy == + DestinationOperandMaterializationStrategy::ExactShape) { + replacements.push_back(newResult); + continue; + } + + assert(destOperand.exactShape && + "expected materialized shape values to be provided for " + "this materialization strategy"); + + replacements.push_back(newResult); + } + + // Since we are about to replace values that may be inputs to other regions + // ops, we need to update the solver to populate the replacement TensorKind + // information. + remapAnalysisState(solver, op->getResults(), replacements); + rewriter.replaceOp(op, replacements); + + // Create the closed region result profile attrs. + SmallVector resultAttrs; + for (const DestinationOperandMaterializationResult &dest : + destinationOperands) { + auto boundsAttr = BoundsAttr::getChecked( + mlir::detail::getDefaultDiagnosticEmitFn(loc), rewriter.getContext(), + BoundsKind::Shape, ArrayRef(dest.constantShapeLowerBound), + ArrayRef(dest.constantShapeUpperBound)); + if (!boundsAttr) + return failure(); + resultAttrs.push_back(boundsAttr); + } + closedGroupOp.setResAttrsAttr(resultAttrs); + + // Create the closed region input profilw attrs. + auto inputAttr = getInputAttributes(rewriter, solver, closedGroupOp->getLoc(), closedGroupOp.getInputs()); + if (failed(inputAttr)) + return emitError(closedGroupOp.getLoc()) + << "failed to compute input attribute "; + + closedGroupOp.setInputAttrsAttr(*inputAttr); + + } else { + // Create a new closed group op and move blocks into it. + InlineClosedGroupNonDPSOp closedGroupOp = + rewriter.create( + op.getLoc(), /*result type*/ op->getResultTypes(), + /*target=*/op.getTarget(), + /*inputs=*/inputs); + + rewriter.inlineBlockBefore( + &op.getRegion().front(), &closedGroupOp.getRegion().front(), + closedGroupOp.getRegion().front().end(), + closedGroupOp.getRegion().getArguments().take_front( + op.getRegion().getNumArguments())); + + rewriter.replaceOp(op, closedGroupOp->getResults()); + + // Create the closed region input profilw attrs. + auto inputAttr = getInputAttributes(rewriter, solver, closedGroupOp->getLoc(), closedGroupOp.getInputs()); + if (failed(inputAttr)) + return emitError(closedGroupOp.getLoc()) + << "failed to compute input attribute "; + + closedGroupOp.setInputAttrsAttr(*inputAttr); + + } return success(); } @@ -549,7 +595,7 @@ class CreateClosedRegionsPass IRRewriter rewriter(ctx); for (InlineGroupOp groupOp : groupOps) { - if (failed(createClosedGroupOp(rewriter, groupOp, solver))) + if (failed(createClosedGroupOp(rewriter, groupOp, solver, useNonDPSCallConv))) return signalPassFailure(); } } diff --git a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/OutlineClusters.cpp b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/OutlineClusters.cpp index 8a66f2830..f71fb3ff7 100644 --- a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/OutlineClusters.cpp +++ b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/OutlineClusters.cpp @@ -106,6 +106,8 @@ static ClusterKindAttrInterface getClusterTargetForRegionOp(Operation *op) { return cast(regionOp.getTarget()); if (auto regionOp = dyn_cast(op)) return cast(regionOp.getTarget()); + if (auto regionOp = dyn_cast(op)) + return cast(regionOp.getTarget()); llvm_unreachable("unknown cluster region op kind"); } @@ -264,6 +266,86 @@ static LogicalResult outlineTensorRTRegion(RewriterBase &rewriter, return success(); } +static LogicalResult outlineTensorRTRegion(RewriterBase &rewriter, + plan::InlineClosedGroupNonDPSOp op) { + tensorrt::TensorRTModuleOp trtModuleOp = getOrCreateTensorRTModuleOp(op); + auto funcArgTypes = llvm::to_vector(TypeRange(op.getInputs())); + FailureOr func = createOutlinedFunc( + rewriter, op.getLoc(), op, trtModuleOp, "tensorrt_cluster", + "cluster.tensorrt", TypeRange(op.getInputs()), + op.getYield()->getOperandTypes()); + if (failed(func)) + return failure(); + assert(func->getFunctionBody().getBlocks().size() == 1 && + "expected body with one block"); + func->setPublic(); + + rewriter.setInsertionPoint(op); + + auto callOp = rewriter.create( + op.getLoc(), op.getResultTypes(), op.getInputs(), + SymbolRefAttr::get(trtModuleOp.getNameAttr(), + {FlatSymbolRefAttr::get(*func)})); + + // Populate the function arguments attributes. + for (unsigned i = 0; i < (*func).getNumArguments(); i++) { + BoundsAttr srcAttr = cast(op.getInputAttrs()[i]); + // We may have scalar (index|signless int)-typed values since we haven't + // eliminated `plan.(with_shape|with_values)` ops yet. + if (!op.argHasTensorType(i) || srcAttr.isNone()) + continue; + FailureOr boundAttr = + getTensorRTShapeProfile(srcAttr, op.getInputs()[i]); + if (failed(boundAttr)) + return op->emitOpError("failed to create TensorRT shape profile " + "attribute from Plan BoundsAttr for argument #") + << i << " (" << srcAttr << ")"; + if (srcAttr.isShapeBound()) { + func->setArgAttr(i, + tensorrt::TensorRTDialect::getShapeProfileArgAttrName(), + *boundAttr); + continue; + } + assert(srcAttr.isValueBound() && "expected value bound or shape bound"); + func->setArgAttr( + i, tensorrt::TensorRTDialect::getShapeTensorValueBoundsArgAttrName(), + *boundAttr); + func->setArgAttr(i, mlir::getHostTensorArgAttrName(), + rewriter.getUnitAttr()); + } + + // Populate the function entry block. + rewriter.eraseBlock(&func->getFunctionBody().front()); + + // Move private decomposition funcs associated with all `stablehlo.composite` + // ops to the `tensorrt.module` op. This is needed since `tensorrt.module` op + // has its own symbol table. + SymbolTableCollection symbolTable; + for (auto compositeOp : op.getBody().getOps()) { + auto decompositionFunc = dyn_cast_if_present( + symbolTable.lookupSymbolIn(op->getParentOfType(), + compositeOp.getDecompositionAttr())); + if (!decompositionFunc) + return emitError(compositeOp.getLoc()) + << "failed to lookup stablehlo.composite decomposition " + "function: " + << compositeOp.getDecompositionAttr(); + rewriter.moveOpAfter(decompositionFunc, func->getOperation()); + } + + // Move region op operations to the func body. + Operation *regionYieldOp = op.getYield(); + rewriter.inlineRegionBefore(op.getRegion(), func->getFunctionBody(), + func->getFunctionBody().end()); + rewriter.setInsertionPoint(regionYieldOp); + rewriter.replaceOpWithNewOp(regionYieldOp, + regionYieldOp->getOperands()); + + // replace the original region results. + rewriter.replaceOp(op, callOp); + return success(); +} + /// Create outlined functions for each `scf.execute_region` operation within /// `region`. static FailureOr> @@ -272,7 +354,7 @@ createFunctionsFromRegions(RewriterBase &rewriter, Region ®ion, SmallVector outlinedFuncs; WalkResult result = region.walk([&](Operation *op) { - if (!isa(op)) + if (!isa(op)) return WalkResult::advance(); if (!isa(getClusterTargetForRegionOp(op))) { @@ -292,8 +374,12 @@ createFunctionsFromRegions(RewriterBase &rewriter, Region ®ion, return WalkResult::advance(); } - if (auto dpsGroup = dyn_cast(op)) { - if (failed(outlineTensorRTRegion(rewriter, dpsGroup))) + if (auto group = dyn_cast(op)) { + if (failed(outlineTensorRTRegion(rewriter, group))) + return WalkResult::interrupt(); + return WalkResult::advance(); + } else if (auto nonDPSGroup = dyn_cast(op)) { + if (failed(outlineTensorRTRegion(rewriter, nonDPSGroup))) return WalkResult::interrupt(); return WalkResult::advance(); } diff --git a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/Passes.cpp b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/Passes.cpp index ab476b754..f081ab07b 100644 --- a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/Passes.cpp +++ b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/Passes.cpp @@ -44,16 +44,19 @@ void plan::buildPlanSegmentationPipeline( pm.addNestedPass( plan::createPlanPopulateFunctionBoundsAttributesPass()); pm.addPass(plan::createStablehloClusteringPass(opts)); - pm.addPass(plan::createCreateClosedRegionsPass()); + plan::CreateClosedRegionsPassOptions closedRegionOptions{}; + closedRegionOptions.useNonDPSCallConv = opts.useNonDPSCallConv; + pm.addPass(plan::createCreateClosedRegionsPass(closedRegionOptions)); pm.addPass(plan::createOutlineClustersPass()); pm.addPass(mlir::createFuncExtDuplicateFunctionEliminationPass()); pm.addPass(plan::createEliminateShapeOpsPass()); } -void plan::buildPlanBufferizationPipeline(OpPassManager &pm) { +void plan::buildPlanBufferizationPipeline( + OpPassManager &pm, const plan::PlanAllocTensorsPassOptions& opts) { pm.addPass(createInlinerPass()); pm.addPass(bufferization::createEmptyTensorEliminationPass()); - pm.addPass(plan::createPlanAllocTensorsPass()); + pm.addPass(plan::createPlanAllocTensorsPass(opts)); pm.addPass(plan::createPlanBufferizePass()); pm.addPass(mlir::createMemRefCastEliminationPass()); pm.addPass(bufferization::createDropEquivalentBufferResultsPass()); @@ -73,6 +76,10 @@ struct ClusteringPipelineCliOpts : public PassPipelineOptions { Option entrypoint{*this, "entrypoint", llvm::cl::init(""), llvm::cl::desc("name of entrypoint function")}; + Option useNonDPSCallConv{ + *this, "use-non-dps-call-conv", + llvm::cl::desc("allow tensorrt based output allocations using output allocator"), + llvm::cl::init(false)}; Option disallowHostTensorsInTensorRTClusters{ *this, "disallow-host-tensors-in-tensorrt-clusters", llvm::cl::desc("don't allow host tensor inputs to tensorrt clusters"), @@ -82,16 +89,28 @@ struct ClusteringPipelineCliOpts llvm::cl::desc("target TensorRT version for segmentation pipeline"), llvm::cl::init(10)}; }; + +struct PlanBufferizationPipelineCliOpts + : public PassPipelineOptions { + Option useNonDPSCallConv{ + *this, "use-non-dps-call-conv", + llvm::cl::desc( + "allow tensorrt based output allocations using output allocator"), + llvm::cl::init(false)}; +}; + } // namespace // Register pipelines. void plan::registerPlanDialectPipelines() { - PassPipelineRegistration<> executorBufferizationPipeline( + PassPipelineRegistration executorBufferizationPipeline( "plan-bufferize-pipeline", "perform bufferization and standard pre/post processing passes", - [](OpPassManager &pm) { - buildPlanBufferizationPipeline(pm); + [](OpPassManager &pm, const PlanBufferizationPipelineCliOpts &opts) { + PlanAllocTensorsPassOptions allocTensorOpts{}; + allocTensorOpts.useNonDPSCallConv = opts.useNonDPSCallConv; + buildPlanBufferizationPipeline(pm, allocTensorOpts); buildPlanBufferOptimizationPipeline(pm); }); @@ -104,6 +123,7 @@ void plan::registerPlanDialectPipelines() { clusterOpts.disallowHostTensorsInTensorRTClusters = opts.disallowHostTensorsInTensorRTClusters; clusterOpts.entrypoint = opts.entrypoint; + clusterOpts.useNonDPSCallConv = opts.useNonDPSCallConv; buildPlanSegmentationPipeline(pm, clusterOpts); }); } diff --git a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/StablehloClustering.cpp b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/StablehloClustering.cpp index b160ea72a..5413c0b9c 100644 --- a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/StablehloClustering.cpp +++ b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/StablehloClustering.cpp @@ -285,7 +285,7 @@ class StablehloClusteringPass for (func::FuncOp func : funcs) { if (failed(applyClusteringToFunc( rewriter, func, solver, schedule, - StablehloClusteringPassOptions{entrypoint, false, false, + StablehloClusteringPassOptions{entrypoint, useNonDPSCallConv, false, false, trtMajorVersion}))) return signalPassFailure(); } diff --git a/mlir-tensorrt/executor/lib/Conversion/MemRefToExecutor.cpp b/mlir-tensorrt/executor/lib/Conversion/MemRefToExecutor.cpp index 90e4c9681..04c14c673 100644 --- a/mlir-tensorrt/executor/lib/Conversion/MemRefToExecutor.cpp +++ b/mlir-tensorrt/executor/lib/Conversion/MemRefToExecutor.cpp @@ -24,11 +24,13 @@ #include "mlir-executor/Conversion/ConvertToExecutorCommon.h" #include "mlir-executor/Conversion/Passes.h" #include "mlir-executor/Executor/IR/Executor.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/MathExtras.h" @@ -548,6 +550,43 @@ void executor::populateMemRefToExecutorPatterns( } namespace { + +class RemoveNoOpClonePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(bufferization::CloneOp op, + PatternRewriter &rewriter) const override { + if (op.getInput().getType() == op.getOutput().getType()) { + rewriter.replaceOp(op, op.getInput()); + return success(); + } + return failure(); + } +}; + +class RemoveRedundantCastsPattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(UnrealizedConversionCastOp op, + PatternRewriter &rewriter) const override { + // Check if this cast is immediately followed by another cast that reverses + // it + if (auto nextCast = + dyn_cast_or_null(op->getNextNode())) { + if (nextCast.getInputs() == op.getResults() && + nextCast.getResultTypes() == op.getOperandTypes()) { + rewriter.replaceOp(nextCast, op.getOperands()); + rewriter.eraseOp(op); + return success(); + } + } + return failure(); + } +}; + /// Pass to convert `memref` to `executor` dialect operrations. class ConvertMemRefToExecutorPass : public mlir::executor::impl::ConvertMemRefToExecutorPassBase< @@ -579,9 +618,20 @@ class ConvertMemRefToExecutorPass RewritePatternSet patterns(ctx); executor::populateMemRefToExecutorPatterns( patterns, typeConverter, allowUncheckedMemrefCastConversion); - if (failed(applyPartialConversion(getOperation(), target, - std::move(patterns)))) + + // Add the new patterns + patterns.add(ctx); + + // Apply the patterns in multiple phases + if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) + return signalPassFailure(); + + // Run a final cleanup to remove any remaining unrealized casts + RewritePatternSet cleanupPatterns(ctx); + cleanupPatterns.add(ctx); + if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(cleanupPatterns)))) return signalPassFailure(); + } }; } // namespace diff --git a/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp b/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp index b01e98477..52bb5b1d7 100644 --- a/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp +++ b/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp @@ -334,9 +334,6 @@ prepareBuffers(const AllocTracker &allocTracker, for (int64_t dimIdx = 0; dimIdx < rank; dimIdx++) dims.d[dimIdx] = va.get(argumentBuffersIdx++); - // Skip the over the stride information. - argumentBuffersIdx += rank; - uintptr_t pointer = buffer.ptr + offset; MTRT_DBGF("enqueue arg %u ptr=0x%lx offset=%ld", i, buffer.ptr, offset); diff --git a/mlir-tensorrt/executor/test/lib/BufferizationTestPass.cpp b/mlir-tensorrt/executor/test/lib/BufferizationTestPass.cpp index f0c3798d7..dc3b2ea70 100644 --- a/mlir-tensorrt/executor/test/lib/BufferizationTestPass.cpp +++ b/mlir-tensorrt/executor/test/lib/BufferizationTestPass.cpp @@ -52,16 +52,26 @@ class ExecutorBufferizationTestPass } } }; + +struct PlanBufferizationPipelineCliOpts + : public PassPipelineOptions { + Option useNonDPSCallConv{ + *this, "use-non-dps-call-conv", + llvm::cl::desc( + "allow tensorrt based output allocations using output allocator"), + llvm::cl::init(false)}; +}; + } // namespace namespace mlir::executor { void registerTestExecutorBufferizePass() { PassRegistration(); - PassPipelineRegistration<> executorBufferizationPipeline( + PassPipelineRegistration executorBufferizationPipeline( "test-executor-bufferization-pipeline", "Run one-shot-bufferization and buffer deallocation pipelines", - [](OpPassManager &pm) { + [](OpPassManager &pm, const PlanBufferizationPipelineCliOpts &opts) { pm.addPass(createInlinerPass()); pm.addPass(std::make_unique()); bufferization::BufferDeallocationPipelineOptions deallocOptions{}; diff --git a/mlir-tensorrt/test/Conversion/TensorRTRuntimeToExecutor/tensorrt-runtime-to-executor.mlir b/mlir-tensorrt/test/Conversion/TensorRTRuntimeToExecutor/tensorrt-runtime-to-executor.mlir index b13b07af8..a483aa211 100644 --- a/mlir-tensorrt/test/Conversion/TensorRTRuntimeToExecutor/tensorrt-runtime-to-executor.mlir +++ b/mlir-tensorrt/test/Conversion/TensorRTRuntimeToExecutor/tensorrt-runtime-to-executor.mlir @@ -69,7 +69,7 @@ func.func @main(%arg0: memref<1x3x256x256xf32, #executor.memory_type>) - return %2 : memref> } -// CHECK-LABEL: module attributes +// CHECK-LABEL: module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, #dlti.dl_entry, 64 : i64>, #dlti.dl_entry, 64 : i64>>} { // CHECK-DAG: executor.func private @_trtrt_alloc_enqueue(!executor.opaque<"trtrt_context">, !executor.ptr, !executor.ptr, ...) // CHECK-DAG: executor.func private @_trtrt_create_runtime() -> !executor.opaque<"trtrt_runtime"> // CHECK-DAG: executor.func private @_trtrt_create_context(!executor.opaque<"trtrt_engine">) -> !executor.opaque<"trtrt_context"> @@ -92,38 +92,45 @@ func.func @main(%arg0: memref<1x3x256x256xf32, #executor.memory_type>) - // CHECK: } // CHECK-LABEL: func.func @main // CHECK-SAME: (%[[arg0:.*]]: memref<1x3x256x256xf32, #executor.memory_type>) -> memref> { -// CHECK-DAG: %[[c0:.*]] = executor.constant 0 : i64 // CHECK-DAG: %[[c256:.*]] = executor.constant 256 : i64 // CHECK-DAG: %[[c3:.*]] = executor.constant 3 : i64 // CHECK-DAG: %[[c4:.*]] = executor.constant 4 : i64 +// CHECK-DAG: %[[c0:.*]] = executor.constant 0 : i64 // CHECK-DAG: %[[c1:.*]] = executor.constant 1 : i64 // CHECK: %[[v6:.*]] = builtin.unrealized_conversion_cast %[[arg0]] : memref<1x3x256x256xf32, #executor.memory_type> to !executor.table, !executor.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64> // CHECK: %[[v7:.*]] = executor.get_global @my_func_exec_ctx : !executor.opaque<"trtrt_context"> // CHECK: %[[v8:.*]] = cuda.stream.create : !cuda.stream // CHECK: %[[v9:.*]] = builtin.unrealized_conversion_cast %[[v8]] : !cuda.stream to !executor.ptr // CHECK: %[[v10:.*]] = executor.alloca %[[c1]] x !executor.table : (i64) -> !executor.ptr -// CHECK-DAG: %[[v11:.*]] = executor.getoffset[0, 0] : () -> i64, !executor.table -// CHECK-DAG: %[[v12:.*]] = executor.getoffset[0, 1] : () -> i64, !executor.table +// CHECK: %[[v11:.*]] = executor.getoffset[0, 0] : () -> i64, !executor.table +// CHECK: executor.store %[[c1]] to %[[v10]] + %[[v11]] : i64, !executor.ptr, i64 +// CHECK: %[[v12:.*]] = executor.getoffset[0, 1] : () -> i64, !executor.table // CHECK: executor.store %[[c4]] to %[[v10]] + %[[v12]] : i64, !executor.ptr, i64 // CHECK: %[[v13:.*]] = executor.table.get %[[v6]][1] : , !executor.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64> // CHECK: %[[v14:.*]] = executor.table.create(%[[v13]], %[[c0]], %[[c4]], %[[c1]], %[[c3]], %[[c256]], %[[c256]] : !executor.ptr, i64, i64, i64, i64, i64, i64) : , i64, i64, i64, i64, i64, i64> // CHECK: executor.call @_trtrt_alloc_enqueue(%[[v7]], %[[v9]], %[[v10]], %[[v14]]) : (!executor.opaque<"trtrt_context">, !executor.ptr, !executor.ptr, !executor.table, i64, i64, i64, i64, i64, i64>) -> () -// CHECK-DAG: %[[v15:.*]] = executor.getoffset[0, 4] : () -> i64, !executor.table -// CHECK: %[[v16:.*]] = executor.load %[[v4:.*]] + %[[v15]] : (!executor.ptr, i64) -> i64 -// CHECK-DAG: %[[v17:.*]] = executor.getoffset[0, 5] : () -> i64, !executor.table -// CHECK: %[[v18:.*]] = executor.load %[[v4]] + %[[v17]] : (!executor.ptr, i64) -> i64 -// CHECK-DAG: %[[v19:.*]] = executor.getoffset[0, 6] : () -> i64, !executor.table -// CHECK: %[[v20:.*]] = executor.load %[[v4]] + %[[v19]] : (!executor.ptr, i64) -> i64 -// CHECK-DAG: %[[v21:.*]] = executor.getoffset[0, 7] : () -> i64, !executor.table -// CHECK: %[[v22:.*]] = executor.load %[[v4]] + %[[v21]] : (!executor.ptr, i64) -> i64 -// CHECK-DAG: %[[v23:.*]] = executor.getoffset[0, 8] : () -> i64, !executor.table -// CHECK: %[[v24:.*]] = executor.load %[[v4]] + %[[v23]] : (!executor.ptr, i64) -> i64 -// CHECK-DAG: %[[v25:.*]] = executor.getoffset[0, 9] : () -> i64, !executor.table -// CHECK: %[[v26:.*]] = executor.load %[[v4]] + %[[v25]] : (!executor.ptr, i64) -> i64 -// CHECK-DAG: %[[v27:.*]] = executor.getoffset[0, 10] : () -> i64, !executor.table -// CHECK: %[[v28:.*]] = executor.load %[[v4]] + %[[v27]] : (!executor.ptr, i64) -> i64 -// CHECK: %[[v29:.*]] = executor.table.create : , !executor.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64> -// CHECK: %[[v30:.*]] = builtin.unrealized_conversion_cast %[[v29]] : !executor.table, !executor.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64> to memref> +// CHECK: %[[v15:.*]] = executor.getoffset[0, 2] : () -> i64, !executor.table +// CHECK: %[[v16:.*]] = executor.load %[[v10]] + %[[v12]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v17:.*]] = executor.load %[[v10]] + %[[v15]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v18:.*]] = executor.inttoptr %[[v17]] : (i64) -> !executor.ptr +// CHECK: %[[v19:.*]] = executor.getoffset[0, 3] : () -> i64, !executor.table +// CHECK: %[[v20:.*]] = executor.load %[[v10]] + %[[v19]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v21:.*]] = executor.getoffset[0, 4] : () -> i64, !executor.table +// CHECK: %[[v22:.*]] = executor.load %[[v10]] + %[[v21]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v23:.*]] = executor.getoffset[0, 5] : () -> i64, !executor.table +// CHECK: %[[v24:.*]] = executor.load %[[v10]] + %[[v23]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v25:.*]] = executor.getoffset[0, 6] : () -> i64, !executor.table +// CHECK: %[[v26:.*]] = executor.load %[[v10]] + %[[v25]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v27:.*]] = executor.getoffset[0, 7] : () -> i64, !executor.table +// CHECK: %[[v28:.*]] = executor.load %[[v10]] + %[[v27]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v29:.*]] = executor.getoffset[0, 8] : () -> i64, !executor.table +// CHECK: %[[v30:.*]] = executor.load %[[v10]] + %[[v29]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v31:.*]] = executor.getoffset[0, 9] : () -> i64, !executor.table +// CHECK: %[[v32:.*]] = executor.load %[[v10]] + %[[v31]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v33:.*]] = executor.getoffset[0, 10] : () -> i64, !executor.table +// CHECK: %[[v34:.*]] = executor.load %[[v10]] + %[[v33]] : (!executor.ptr, i64) -> i64 +// CHECK: %[[v35:.*]] = executor.table.create(%[[v18]], %[[v18]], %[[c0]], %[[v20]], %[[v22]], %[[v24]], %[[v26]], %[[v28]], %[[v30]], %[[v32]], %[[v34]] : !executor.ptr, !executor.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64) : , !executor.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64> +// CHECK: %[[v36:.*]] = builtin.unrealized_conversion_cast %[[v35]] : !executor.table, !executor.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64> to memref> // CHECK: cuda.stream.sync %[[v8]] : !cuda.stream -// CHECK: return %[[v30]] : memref> -// CHECK: } +// CHECK: return %[[v36]] : memref> +// CHECK: } \ No newline at end of file diff --git a/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_add.py b/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_add.py index d81bedeff..7fc3ffdf9 100644 --- a/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_add.py +++ b/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_add.py @@ -14,17 +14,17 @@ """ -def stablehlo_add(): +def stablehlo_add(use_non_dps=False): # Build/parse the main function. with ir.Context() as context: m = ir.Module.parse(ASM) # Use the compiler API to compile to executable. client = compiler.CompilerClient(context) - opts = compiler.StableHLOToExecutableOptions( - client, - ["--tensorrt-builder-opt-level=3", "--tensorrt-strongly-typed=false"], - ) + c_opts = ["--tensorrt-builder-opt-level=3", "--tensorrt-strongly-typed=false"] + if use_non_dps: + c_opts.append("--use-non-dps-call-conv") + opts = compiler.StableHLOToExecutableOptions(client, c_opts) exe = compiler.compiler_stablehlo_to_executable(client, m.operation, opts) # The RuntimeClient can and should persist across multiple Executables, RuntimeSessions, etc. @@ -44,40 +44,53 @@ def stablehlo_add(): device=devices[0], stream=stream, ) - arg1 = client.create_memref( - np.zeros(shape=(2, 3, 4), dtype=np.float32).data, - device=devices[0], - stream=stream, - ) - session.execute_function( - "main", in_args=[arg0], out_args=[arg1], stream=stream, client=client - ) - data = np.asarray(client.copy_to_host(arg1, stream=stream)) + result = None + if use_non_dps: + results = session.execute_function( + "main", in_args=[arg0], stream=stream, client=client + ) + result = results[0] + else: + result = client.create_memref( + np.zeros(shape=(2, 3, 4), dtype=np.float32).data, + device=devices[0], + stream=stream, + ) + session.execute_function( + "main", in_args=[arg0], out_args=[result], stream=stream, client=client + ) + + data = np.asarray(client.copy_to_host(result, stream=stream)) stream.sync() print(data) - # Run execution a bunch more times asynchronously so that it calculates - # `x * 2**num_iter`. - num_iter = 5 - start_time = time.time() - for _ in range(0, num_iter): - session.execute_function( - "main", in_args=[arg0], out_args=[arg0], stream=stream, client=client - ) - data = np.asarray(client.copy_to_host(arg1, stream=stream)) - stream.sync() - end_time = time.time() - elapsed = end_time - start_time + if not use_non_dps: + # Run execution a bunch more times asynchronously so that it calculates + # `x * 2**num_iter`. + num_iter = 5 + start_time = time.time() + for _ in range(0, num_iter): + session.execute_function( + "main", in_args=[arg0], out_args=[arg0], stream=stream, client=client + ) + data = np.asarray(client.copy_to_host(arg0, stream=stream)) + stream.sync() + end_time = time.time() + elapsed = end_time - start_time - print(np.asarray(client.copy_to_host(arg0))) - print(f"1000 iterations avg { (elapsed/num_iter)/1000.0} msec per iteration") + print(np.asarray(client.copy_to_host(arg0))) + print(f"1000 iterations avg { (elapsed/num_iter)/1000.0} msec per iteration") if __name__ == "__main__": + print("DPS style execution:") stablehlo_add() + print("Non DPS style execution:") + stablehlo_add(use_non_dps=True) +# CHECK-LABEL: DPS style execution: # CHECK: [ 0. 2. 4. 6.] # CHECK-NEXT: [ 8. 10. 12. 14.] # CHECK-NEXT: [16. 18. 20. 22.]] @@ -92,3 +105,11 @@ def stablehlo_add(): # CHECK-NEXT: [384. 416. 448. 480.] # CHECK-NEXT: [512. 544. 576. 608.] # CHECK-NEXT: [640. 672. 704. 736.] +# CHECK-LABEL: DPS style execution: +# CHECK: [ 0. 2. 4. 6.] +# CHECK-NEXT: [ 8. 10. 12. 14.] +# CHECK-NEXT: [16. 18. 20. 22.]] +# CHECK-NEXT: +# CHECK-NEXT: [24. 26. 28. 30.] +# CHECK-NEXT: [32. 34. 36. 38.] +# CHECK-NEXT: [40. 42. 44. 46.]]] diff --git a/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py b/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py deleted file mode 100644 index 5b0b6c4ff..000000000 --- a/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py +++ /dev/null @@ -1,124 +0,0 @@ -# RUN: %PYTHON %s -import time - -import mlir_tensorrt.compiler.api as compiler -import mlir_tensorrt.compiler.ir as ir -import mlir_tensorrt.runtime.api as runtime -import numpy as np - -ASM = """ -func.func @main(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> { - %1 = stablehlo.add %arg0, %arg1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32> - func.return %1 : tensor<1xf32> -} -""" - -EXECUTOR = """ -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, #dlti.dl_entry, 64 : i64>, #dlti.dl_entry, 64 : i64>>, executor.global_init_func = @executor_init_globals, executor.process_grid_shape = array} { - executor.func private @_dealloc(...) - executor.func private @_inttoptr_i64_i64(i64) -> !executor.ptr - executor.func private @_load_i64(...) -> i64 - executor.func private @_store_i64(...) - executor.func private @executor_alloc(i64, i64) -> !executor.ptr - executor.func private @__cuda_stream_create() -> !executor.ptr - executor.global @stream0 constant : !executor.ptr - executor.func private @_trtrt_enqueue_alloc(!executor.opaque<"trtrt_context">, !executor.ptr, !executor.ptr, ...) - executor.func private @_trtrt_create_runtime() -> !executor.opaque<"trtrt_runtime"> - executor.func private @_trtrt_create_context(!executor.opaque<"trtrt_engine">) -> !executor.opaque<"trtrt_context"> - executor.func private @_trtrt_load(!executor.opaque<"trtrt_runtime">, !executor.ptr, i64) -> !executor.opaque<"trtrt_engine"> - executor.global @tensorrt_runtime : !executor.opaque<"trtrt_runtime"> - executor.constant_resource @tensorrt_cluster_engine_data dense<"0x66747274000000000100000000000000D4410000000000000A040A02727472744E474E4500000000380000000000000092410000000000007074727400000000EF000000000000007A41000000000000028001000A0280010004028001000A0280010002D2800100078001000000000012800100108001000780010008000000078001000900000007800100F8C3250007800100883BA00007800100C00000000880010000000003000000000880010000001002000000000780010000C000000780010000900100078001000002000007800100380000000A80010000078001000004000007800100FFFFFF7F07800100FFFF000007800100FFFF000007800100000400000980010000000DE702000000EF7FED7F0A800100009E800100098001000000000000000000617FE98001000980010000000000000000000980010000000000000000000780010000000000098001000000000000000000098001000000000000000000167FE880010007800100000000000A800100000A800100010A8001000031800100078001000000000009800100000000000000000009800100010000000000000018800100110000000000000001800C8001001100000000000000556E6E616D6564204E6574776F726B203001800880010000000000000000000680010000000000018001800100010000000000000001800880010000000000000000000880010003000000000000009F8001002680010007800100000000009D80010018800100070000000000000001800C8001000700000000000000726573756C74303780010003800100000E800100078001000000000003800100000180098001000800000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000780010000000080C87F118001000780010001000000018007800100010000000000000001000000EE7F2480010007800100000000005380010007800100010000000680010002000000158001000780010000000000098001000100000000000000627F607F9F8001002680010007800100000000009D80010018800100040000000000000001800C8001000400000000000000617267313780010003800100000E800100078001000000000003800100000180098001000800000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000780010000000080C87F118001000780010001000000018007800100010000000000000001000000EE7F2480010007800100000000005380010007800100000000000680010001000000158001000780010000000000098001000000000000000000627F607F9F8001002680010007800100000000009D80010018800100040000000000000001800C8001000400000000000000617267303780010003800100000E800100078001000000000003800100000180098001000800000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000780010000000080C87F118001000780010001000000018007800100010000000000000001000000EE7F2480010007800100000000005380010007800100000000000680010000000000158001000780010000000000098001000000000000000000627F607F098001000100000000000000CC800100CB800100078001000000000049800100088001000100000000000000078001005900000001800280010098320000000000007F454C460201013307000000000000000200BE007E0000000000000000000000B831000000000000782D00000000000059055900400038000400400011000100002E7368737472746162002E737472746162002E73796D746162002E73796D7461625F73686E6478002E6E762E696E666F002E746578742E67656E6572617465644E6174697665506F696E7477697365002E6E762E696E666F2E67656E6572617465644E6174697665506F696E7477697365002E6E762E7368617265642E67656E6572617465644E6174697665506F696E7477697365002E6E762E676C6F62616C2E696E6974002E6E762E636F6E7374616E7434002E6E762E676C6F62616C002E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E72656C2E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E64656275675F6672616D65002E72656C2E6E762E636F6E7374616E7434002E72656C2E64656275675F6672616D65002E72656C612E64656275675F6672616D65002E6E762E63616C6C6772617068002E6E762E70726F746F74797065002E6E762E72656C2E616374696F6E00002E7368737472746162002E737472746162002E73796D746162002E73796D7461625F73686E6478002E6E762E696E666F002E746578742E67656E6572617465644E6174697665506F696E7477697365002E6E762E696E666F2E67656E6572617465644E6174697665506F696E7477697365002E6E762E7368617265642E67656E6572617465644E6174697665506F696E7477697365002E6E762E676C6F62616C2E696E6974005F5A4E537439747275655F747970653576616C756545002E6E762E636F6E7374616E7434002E6E762E676C6F62616C005F5A4E5374313066616C73655F747970653576616C756545005F5A4E35707767656E364B417272617949664A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313053726353756243566563454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313044737453756243566563454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B417272617949694A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E535F3654656E736F72494C6931454E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373753726343566563454C4E535F36494F5479706545304545454A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E535F3654656E736F72494C6931454E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373744737443566563454C4E535F36494F5479706545314545454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B417272617949694A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B41727261794950664A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B41727261794950664A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313044737453756243566563454A4C6938454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E53305F494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313044737453756243566563454A4C6938454545454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313053726353756243566563454A4C6938454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E53305F494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313053726353756243566563454A4C6938454545454A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B417272617949664A4C6932454545386B4E425F44494D5345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137386E62496E7075747345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137396E624F75747075747345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137366E6244696D7345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373769647844696D4345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137386356656353697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731317375624356656353697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731316E624356656353706C697445005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313735697350435145005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731316861735372635363616C6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731316861734473745363616C6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373763746153697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731326E62496E6E6572497465727345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731326E624F75746572497465727345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137376E62497465727345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373767727053697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731306772707350657243746145005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731307365677350657247727045005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373131635665637350657247727045005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313767727053697A6547744356656353697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313669734C6173744356656350616464656445005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731346D61784E6174697665416C69676E45002E72656C2E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E64656275675F6672616D65002E72656C2E6E762E636F6E7374616E7434002E72656C2E64656275675F6672616D65002E72656C612E64656275675F6672616D65002E6E762E63616C6C6772617068002E6E762E70726F746F74797065002E6E762E72656C2E616374696F6E0067656E6572617465644E6174697665506F696E7477697365000000000000000000000000000000000000000000000000000000003200000003000D00000000000000000000000000000000007300000003000F00000000000000000000000000000000009700000003000E0000000000000000000000000000000000A700000001000E0074000000000000000100000000000000BE00000003000B0000000000000000000000000000000000CC0000000300100000000000000000000000000000000000D70000000100100004000000000000000100000000000000F000000001000E00440000000000000004000000000000001401000001000E00080000000000000004000000000000008601000001000E0060000000000000000400000000000000F801000001000E00340000000000000004000000000000001C02000001000E0050000000000000000400000000000000A902000001000E00180000000000000004000000000000003603000001000E00680000000000000004000000000000005A03000001000E00200000000000000004000000000000007F03000001000E003C000000000000000400000000000000A403000001000E00000000000000000004000000000000001604000001000E00580000000000000004000000000000009504000001000E002C0000000000000004000000000000000705000001000E004C0000000000000004000000000000008605000001000E0010000000000000000400000000000000AA05000001000E00700000000000000004000000000000003806000001000E0028000000000000000400000000000000C706000001000E0048000000000000000400000000000000530700000100100000000000000000000400000000000000E007000001000E000C0000000000000004000000000000006E08000001000E00640000000000000004000000000000000009000001000E00380000000000000004000000000000009209000001001000060000000000000001000000000000001D0A00000100100008000000000000000100000000000000AF0A00000100100005000000000000000100000000000000410B000001000E0054000000000000000400000000000000CE0B000001000E001C000000000000000400000000000000610C000001000E006C000000000000000400000000000000F40C000001000E0024000000000000000400000000000000810D000001000E00400000000000000004000000000000000E0E000001000E00040000000000000004000000000000009F0E000001000E005C000000000000000400000000000000300F000001000E0030000000000000000400000000000000C20F000001000E00750000000000000001000000000000005A1000000100100007000000000000000100000000000000F110000001000E0014000000000000000400000000000000B111000003000C0000000000000000000000000000000000D811000003000400000000000000000000000000000000001A12000003000700000000000000000000000000000000003612000003000800000000000000000000000000000000004512000012100D000000000000000000800D000000000000FFFFFFFF2400000000000000FFFFFFFFFFFFFFFF0300047CFFFFFFFF0F0C818080280008FF8180280881808028000000FFFFFFFF340000000000000000000000000000000000000000000000800D000000000000040400000004400000000C818080280004F002000000000000000000042F08002F00000028000000041108002F00000000000000041208002F00000000000000043704007E000000040A08002B000000600158000319580004170C00000000000500400000F0610004170C00000000000400100000F0C10004170C000000000003000C0000F0110004170C00000000000200080000F0110004170C00000000000100040000F0110004170C00000000000000000000F01100031BFF00041C280000010000900B0000E00B0000000C0000200C0000400C0000600C0000800C0000B00C0000D00C0000041E04000000000000000000FFFFFFFF00000000FEFFFFFF00000000FDFFFFFF00000000FCFFFFFF00000000730000000000000000000011250005362001000000000000020000002A00000018010000000000000200000029000000100100000000000002000000280000000801000000000000020000002700000000010000000000000200000026000000F8000000000000000200000025000000F0000000000000000200000024000000E8000000000000000200000023000000E0000000000000000200000022000000D8000000000000000200000021000000D0000000000000000200000020000000C800000000000000020000001F000000C000000000000000020000001E000000B800000000000000020000001D000000B000000000000000020000001C000000A800000000000000020000001B000000A000000000000000020000001A000000980000000000000002000000190000009000000000000000020000001800000088000000000000000200000017000000800000000000000002000000160000007800000000000000020000001500000070000000000000000200000014000000680000000000000002000000130000006000000000000000020000001200000058000000000000000200000011000000500000000000000002000000100000004800000000000000020000000F0000004000000000000000020000000E0000003800000000000000020000000D0000003000000000000000020000000C0000002800000000000000020000000B0000002000000000000000020000000A000000180000000000000002000000090000001000000000000000020000000800000008000000000000000200000007000000000000000000000002000000040000004400000000000000020000002F0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000247601FF000A0000FF008E0700E40F0019790000000000000021000000280E0019790300000000000025000000620E000C720000FF0000007052F00300E21F00247800038000000000028E0700CA2F00197804FF050000000016010000CE0F000C8A00FF006000007052F20300E40F000C8A00FF006600007052F40300E40F00078802FF010000000000800000E40F00078803FF010000000000000100E40F000C7A0004005800007062F20300E20F00888300FF020000000000000000E80100888300FF020200000000000000E80100888300FF030100000000000000E80100888300FF030300000000000000E801001D7B0000000000000000010000EC0F004D190000000000000000800300EA0F00137A1200005900000000000000E21F00B97A040000460000000A000000E20F0013721300040000000000000000C40F0006731000120000000094200000220E000C720004FF0000007062F40300E40F00127800001F000000FFC08E0700CA0F0008731000100000000010000000241E0010780210FEFFFF0FFFE0FF0700CC1F00057303000200000000F0210000640000247202FFFF000000FF008E0700E41F00247211FFFF000000030A8E0700C82F002472111112000000FF028E0700C80F00277203031100000002008E0700C80F00247211FFFF00000013008E0700C80F002772030311000000FF008E0700C80F00247203FFFF000000030A8E0700C80F00247203120300000011028E0700CA0F000C720012030000007040F00300DA0F002488030301000000120A8E0700E20F000C7A00FF005900007052F00300C80F000C720012030000007040F20300DA0F002498030301000000120A8E0700C80F0024A203FFFF000000030A8E0700E20F00128A03FF00590000FF338E0700C80F0016780003042100000000000000C80F001078020020000000FFE0FF0700E40F041078030040000000FFE0FF0700E40F000C7A0002005A00007062F00300E40F000C7A0003005A00007062F20300E40F001078100060000000FFE0FF0700C80F000C7A0010005A00007062F40300E40F001078100080000000FFE0FF0700C60F00848902FF000200000000000000220E000C7A0010005A00007062F60300E40F0010781000A0000000FFE0FF0700E20F00848903FF000300000000000000660E000C7A0010005A00007062F80300E20F00849912FF000200000000000000A80E00849913FF000300000000000000E80E0084A914FF000200000000000000280F0084A915FF000300000000000000280F0084B91AFF000200000000000000280F0084B91BFF000300000000000000220F000C720002FF00000070527C0400E21F00247A020000600000FF028E0700E20F040C720003FF00000070527A0400E22F00247A030000660000FF028E0700C60F001088110220000000FFE0FF0700E40F041098210240000000FFE0FF0700E40F001088100320000000FFE0FF0700E40F041098170340000000FFE0FF0700E20F00246211FFFF00000002028E0700E20F000C720012FF0000007052FC0400E44F0010A8250260000000FFE0FF0700E20F00245210FFFF00000003028E0700E20F000C720013FF0000007052FA0400C48F0084C913FF000200000000000000220E0010781200C0000000FFE0FF0700E40F0010A8160360000000FFE0FF0700E40F0010B8240280000000FFE0FF0700E40F00246221FFFF00000002028E0700E20F000C720014FF00000070527C0500E40F0184C914FF000300000000000000620E00245217FFFF00000003028E0700E20F000C720015FF00000070527A0500C40F0010B8200380000000FFE0FF0700E40F0410C81D02A0000000FFE0FF0700E40F0010C81C03A0000000FFE0FF0700C60F00246225FFFF00000002028E0700E20F000C72001AFF0000007052FC0500C60F00245216FFFF00000003028E0700E20F000C7A0012005A00007062FA0300D20F00246224FFFF00000002028E0700E20F000C72001BFF0000007052FC0500C60F0084D912FF000200000000000000A20E0010D81F02C0000000FFE0FF0700E40F0010D81E03C0000000FFE0FF0700CE0F00246220FFFF00000003028E0700E20F000C720013FF00000070527C0600E41F0084D913FF000300000000000000360E0024621DFFFF00000002028E0700E20F000C720014FF00000070527C0600DA2F0024621CFFFF00000003028E0700E20F000C720012FF0000007052FC0600DA4F0010621F02FF000000FFE0FF0700E40F000C720013FF0000007052FC0600E21F00248415FF04000000FF008E0700D80F0024621EFFFF00000003028E0700E20F000C7A0000005A00007062FC0300E20F00248214FFFF00000010008E0700E40F0025861015005C000011028E0700E20F0003781BFF400000000000000000C60F00258614150062000014028E0700E20F00818906100400000000191E0C00680100818907140400000000191E0C0062030024E412FF04000000FF008E0700C80F0025E62202005C000012028E0700C80F0025E612030062000012028E0700E20F0081E904220400000000191E0C0066050024A414FF04000000FF008E0700E22F0081E905120400000000191E0C00620300249410FF04000000FF008E0700E21F0003781AFF400000000000000000E20F00249223FFFF00000021008E0700E44F0024B421FF04000000FF008E0700C40F0025A61214005C000025028E0700C82F0025962210005C000023028E0700E20F0081A90A120400000000191E0C0066010025A614140062000016028E0700E20F00819908220400000000191E0C00660300259610100062000017028E0700E20F0081A90B140400000000191E0C0066050025B61621005C000024028E0700E20F0002D8240004000000000F000000E20F00819909100400000000191E0C0064070024D223FFFF0000001F008E0700C42F0024D225FFFF0000001E008E0700E20F0081B90C160400000000191E0C0062030024C414FF04000000FF008E0700E44F0024C222FFFF0000001C008E0700E40F0025B61E210062000020028E0700C80F0025C61C14005C00001D028E0700E20F0081B90D1E0400000000191E0C0066050025D61224005C000023028E0700E21F0481C90E1C0400000000191E0C0066050025D610240062000025028E0700E28F0081D918120400000000191E0C0068050081D919100400000000191E0C0062050010781600E0000000FFE0FF0700E22F0025C614140062000022028E0700C60F000C7A0016005A00007062FC0300E40F0081C90F140400000000191E0C00640100037814FF400000000000000000E41F000C72001AFF0000007052FC0300E20F0045790000000200000000800300D80F0047690000E00100000000800300EA0F00037816FF020000000000000000E44F000C720014FF0000007052F20300DA0F00849911FF000200000000000000220E0010981002E0000000FFE0FF0700E20F00249415FF04000000FF008E0700E20F000C720011FF0000007052FC0400E41F00849911FF000300000000000000360E00246210FFFF00000002028E0700E20F0010980203E0000000FFE0FF0700C40F000C720011FF0000007052FC0400DA1F00246202FFFF00000003028E0700C80F00249211FFFF00000002008E0700E40F0025960215005C000010028E0700C80F00259610150062000011028E0700E20F00819912020400000000191E0C0064010021721504050000000000010000E40F02819913100400000000191E0C006201000C720016FF0000007052F20300E20F0047090000B00000000000800300F60F0021721106070000000000010000E21F0047190000900000000000800300EA0F0021720908090000000000010000E20F0047290000700000000000800300EA0F0021720B0A0B0000000000010000E20F0047390000500000000000800300EA0F0021720D0C0D0000000000010000E20F0047490000300000000000800300EA0F0021D20518190000000000010000E20F0021D20712130000000000010000E20F0221720F0E0F0000000000010000C40F0041790000000000000000800300EA4F000C72001BFF0000007052FC0300DA0F004D690000000000000000800300EA0F00247403FF04000000FF008E0700E41F00247A0200006C0000FF028E0700C80F00257602020068000003028E0700CA0F0086790002150000000419100C00E201004D090000000000000000800300EA0F0086790002118000000419100C00E203004D190000000000000000800300EA0F0086790002090001000419100C00E205024D290000000000000000800300EA0F00867900020B8001000419100C00E207004D390000000000000000800300EA0F00867900020D0002000419100C00E209004D490000000000000000800300EA0F00867900020F8002000419100C00E201004D590000000000000000800300EA0F000C720014FF0000007052FC0300E20F0086790002050003000419100C00D801004D690000000000000000800300EA0F0086790002078003000419100C00E21F004D790000000000000000800300EA0F0047790000F0FFFFFFFFFF830300C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F000800000004000000010000000100000002000000100000000100000008000000020000000800000001000000080000000001000001000000010000000100000020000000010000000100000002000000020000008000000001000000200000000100000001000000020000000100000002000000010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000030000000000000000000000000000000000000040000000000000007F010000000000000000000000000000010000000000000000000000000000000B0000000300000000000000000000000000000000000000BF010000000000005E1200000000000000000000000000000100000000000000000000000000000013000000020000000000000000000000000000000000000020140000000000008004000000000000020000002F00000008000000000000001800000000000000120100000100000000000000000000000000000000000000A0180000000000007000000000000000000000000000000001000000000000000000000000000000290000000000007000000000000000000000000000000000101900000000000024000000000000000300000000000000040000000000000000000000000000005100000000000070400000000000000000000000000000003419000000000000B000000000000000030000000D00000004000000000000000000000000000000540100000100007000000000000000000000000000000000E4190000000000002000000000000000030000000000000004000000000000000800000000000000700100000B00007000000000000000000000000000000000081A00000000000010000000000000000000000000000000080000000000000008000000000000001F0100000900000040000000000000000000000000000000181A0000000000005002000000000000030000000B00000008000000000000001000000000000000310100000900000040000000000000000000000000000000681C0000000000001000000000000000030000000400000008000000000000001000000000000000A70000000100000002000000000000000000000000000000781C0000000000002801000000000000000000000000000008000000000000000000000000000000C00000000100000042000000000000000000000000000000A01D000000000000B801000000000000000000000D00000004000000000000000000000000000000320000000100000006001000000000000000000000000000801F000000000000800D000000000000030000002F00002880000000000000000000000000000000970000000100000003000000000000000000000000000000002D0000000000007600000000000000000000000000000004000000000000000000000000000000730000000800000043000000000000000000000000000000762D0000000000000400000000000000000000000D00000001000000000000000000000000000000B50000000800000003000000000000000000000000000000782D00000000000009000000000000000000000000000000040000000000000000000000000000000600000005000000B83100000000000000000000000000000000000000000000E000000000000000E00000000000000008000000000000000100000005000000781C000000000000000000000000000000000000000000008810000000000000881000000000000008000000000000000100000006000000002D000000000000000000000000000000000000000000007800000000000000850000000000000008000000000000000100000005000000B83100000000000000000000000000000000000000000000E000000000000000E000000000000000080000000000000008800100000000000000000018800100180000000000000001800C800100180000000000000067656E6572617465644E6174697665506F696E7477697365B67F337F0880010001000000000000000F800100078001002800008095800100088001000200000000000000018069800100020000000000000018800100040000000000000001800C800100040000000000000061726730118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F18800100040000000000000001800C800100040000000000000061726731118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F088001000100000000000000018069800100010000000000000018800100070000000000000001800C8001000700000000000000726573756C7430118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F188001009E0000000000000001800C8001009E0000000000000050574E285B74656E736F7272742E656C656D656E745F776973655D206C6F63282F776F726B7370616365732F54656E736F7252542D496E63756261746F722F6D6C69722D74656E736F7272742F746573742F5461726765742F4C75612F496E746567726174696F6E54657374732F436C7573746572696E6744796E616D696353686170652F616C6C6F635F656E71756575652E6D6C69723A373A3133292918800100000000000000000001800C80010000000000000000000F800100078001002800008008800100090000000000000007800100FFFFFFFF628001000A8001000155800100038001000201800B80010000000000000000000180548001000100000000000000228001000380010001038001000203800100000380010000AB7FAA7F468001000A80010000B97F9D7F0A8001000007800100020000004A80010008800100FF00000000000000B57F1380010007800100010000000780010020000000078001000800000007800100010000000780010080000000EC7F07800100000000006A7F57800100098001005401000000000000098001000000000000000000A87F0880010060010000000000000880010000000000000000000180088001000100000000000000000000000000000008800100000000000000000009800100010000000000000009800100010000000000000018800100070000000000000001800C8001000700000000000000726573756C74306980010018800100070000000000000001800C8001000700000000000000726573756C7430118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F018089800100020000000000000088800100078001000000000001808680010000000000000000000180078001000000000000000000777F07800100000000000180A58001000000000000000000078001000000000001801880010000000000000000000A80010000018087800100000000000000000001808780010000000000000000000180078001000000000000000000018007800100000000000000000001800780010000000000000000000180188001000000000000000000767F88800100078001000000000001808680010000000000000000000180078001000000000000000000777F07800100000000000180A58001000000000000000000078001000000000001801880010000000000000000000A80010001018087800100000000000000000001808780010000000000000000000180078001000000000000000000018007800100000000000000000001800780010000000000000000000180188001000000000000000000767F098001000000000000000000098001000000000000000000098001000000000000000000098001000000000000000000A080010001800C8001000101000000000000506172616D657465725479706500506F696E745769736500506172616D657465725375625479706500506F696E745769736545787072657373696F6E004E62496E70757441726773003200496E70757441726773005B2261726730222C202261726731225D004E624F7574707574566172730031004F757470757456617273005B2276617230225D004E62506172616D73003000506172616D73005B5D004E624C69746572616C730030004C69746572616C73005B5D004E624F7065726174696F6E730031004F7065726174696F6E73005B226175746F20636F6E73742076617230203D20707767656E3A3A69506C757328617267302C2061726731293B225D000180098001000200000000000000000000000000000001010000000000005F7F01808C800100010000000000000001808A80010003000000000000000180EA8001000300000000000000078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F757F0180EA8001000300000000000000078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F757F0180EA800100030000000000000007800100FFFFFFFF157F07800100FFFFFFFF157F07800100FFFFFFFF157F757F01808B80010003000000000000000A800100000180018001000300000000000000018007800100000000000000000001800780010000000000000000000180078001000000000000000000747F0A800100000180018001000300000000000000018007800100000000000000000001800780010000000000000000000180078001000000000000000000747F0A800100000180018001000300000000000000018007800100000000000000000001800780010000000000000000000180078001000000000000000000747F737F0180078001000200000000000000000000000100000001800780010001000000000000000100000094800100078001000000000001800180010001000000000000000180C180010000000000000000000180D580010000000000000000000180D68001000000000000000000078001000000000001800180010000000000000000000180018001000000000000000000068001008F8B8D8B000000000000998B8D8B"> : tensor<16852xi8> - executor.global @tensorrt_cluster_exec_ctx constant : !executor.opaque<"trtrt_context"> - func.func @main(%arg0: !executor.table, !executor.ptr, i64, i64, i64>, %arg1: !executor.table, !executor.ptr, i64, i64, i64>) -> !executor.table, !executor.ptr, i64, i64, i64> attributes {executor.function_metadata = #executor.func_meta<[memref<1xf32, #executor.memory_type>, memref<1xf32, #executor.memory_type>], [memref<1xf32, #executor.memory_type>], num_output_args = 0>} { - %c32_i64 = executor.constant 32 : i64 - %c24_i64 = executor.constant 24 : i64 - %c16_i64 = executor.constant 16 : i64 - %c8_i64 = executor.constant 8 : i64 - %c4_i64 = executor.constant 4 : i64 - %c40_i64 = executor.constant 40 : i64 - %c0_i64 = executor.constant 0 : i64 - %c1_i64 = executor.constant 1 : i64 - %0 = executor.get_global @tensorrt_cluster_exec_ctx : !executor.opaque<"trtrt_context"> - %1 = executor.get_global @stream0 : !executor.ptr - %2 = executor.table.get %arg0[1] : , !executor.ptr, i64, i64, i64> - %3 = executor.table.get %arg1[1] : , !executor.ptr, i64, i64, i64> - %4 = executor.call @executor_alloc(%c40_i64, %c4_i64) : (i64, i64) -> !executor.ptr - executor.call @_store_i64(%4, %c0_i64, %c1_i64) : (!executor.ptr, i64, i64) -> () - executor.call @_store_i64(%4, %c8_i64, %c1_i64) : (!executor.ptr, i64, i64) -> () - %5 = executor.table.create(%2, %c0_i64, %c1_i64, %c1_i64, %c1_i64, %3, %c0_i64, %c1_i64, %c1_i64, %c1_i64 : !executor.ptr, i64, i64, i64, i64, !executor.ptr, i64, i64, i64, i64) : , i64, i64, i64, i64, !executor.ptr, i64, i64, i64, i64> - executor.call @_trtrt_enqueue_alloc(%0, %1, %4, %5) : (!executor.opaque<"trtrt_context">, !executor.ptr, !executor.ptr, !executor.table, i64, i64, i64, i64, !executor.ptr, i64, i64, i64, i64>) -> () - %6 = executor.call @_load_i64(%4, %c8_i64) : (!executor.ptr, i64) -> i64 - %7 = executor.call @_load_i64(%4, %c16_i64) : (!executor.ptr, i64) -> i64 - %8 = executor.call @_inttoptr_i64_i64(%7) : (i64) -> !executor.ptr - %9 = executor.call @_load_i64(%4, %c24_i64) : (!executor.ptr, i64) -> i64 - %10 = executor.call @_load_i64(%4, %c32_i64) : (!executor.ptr, i64) -> i64 - %11 = executor.table.create(%8, %8, %c0_i64, %9, %10 : !executor.ptr, !executor.ptr, i64, i64, i64) : , !executor.ptr, i64, i64, i64> - executor.call @_dealloc(%4) : (!executor.ptr) -> () - return %11 : !executor.table, !executor.ptr, i64, i64, i64> - } - func.func private @executor_init_globals() { - %c16852_i64 = executor.constant 16852 : i64 - %0 = executor.call @__cuda_stream_create() : () -> !executor.ptr - executor.set_global %0, @stream0 : !executor.ptr - %1 = executor.call @_trtrt_create_runtime() : () -> !executor.opaque<"trtrt_runtime"> - executor.set_global %1, @tensorrt_runtime : !executor.opaque<"trtrt_runtime"> - %2 = executor.load_constant_resource @tensorrt_cluster_engine_data : !executor.ptr - %3 = executor.get_global @tensorrt_runtime : !executor.opaque<"trtrt_runtime"> - %4 = executor.call @_trtrt_load(%3, %2, %c16852_i64) : (!executor.opaque<"trtrt_runtime">, !executor.ptr, i64) -> !executor.opaque<"trtrt_engine"> - %5 = executor.call @_trtrt_create_context(%4) : (!executor.opaque<"trtrt_engine">) -> !executor.opaque<"trtrt_context"> - executor.set_global %5, @tensorrt_cluster_exec_ctx : !executor.opaque<"trtrt_context"> - return - } -} -""" - - -def stablehlo_add(): - # Build/parse the main function. - with ir.Context() as context: - m = ir.Module.parse(EXECUTOR) - - # Use the compiler API to compile to executable. - client = compiler.CompilerClient(context) - opts = compiler.StableHLOToExecutableOptions( - client, - ["--tensorrt-builder-opt-level=3", "--tensorrt-strongly-typed=false"], - ) - opts.set_debug_options(False, [], "enqueue_alloc") - exe = compiler.compiler_stablehlo_to_executable(client, m.operation, opts) - - # The RuntimeClient can and should persist across multiple Executables, RuntimeSessions, etc. - # It is primarily an interface for creating and manipulating buffers. - client = runtime.RuntimeClient() - stream = client.create_stream() - devices = client.get_devices() - - if len(devices) == 0: - return - - session_options = runtime.RuntimeSessionOptions(num_devices=1, device_id=0) - session = runtime.RuntimeSession(session_options, exe) - - arg0 = client.create_memref( - np.array([1.0], dtype=np.float32).data, - device=devices[0], - stream=stream, - ) - arg1 = client.create_memref( - np.array([2.0], dtype=np.float32).data, - device=devices[0], - stream=stream, - ) - - results = session.execute_function( - "main", in_args=[arg0, arg1], stream=stream, client=client - ) - - data = np.asarray(client.copy_to_host(results[0], stream=stream)) - stream.sync() - - print(data) - - -if __name__ == "__main__": - stablehlo_add()