diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h index 8e86f8bd0..4070778fe 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h @@ -128,6 +128,19 @@ struct DeviceOptions : public OptionsProvider { llvm::Error finalizeImpl(); }; +struct CommonCompilationOptions + : public OptionsProvider { +public: + /// Entrypoint function name. + std::string entrypoint = "main"; + +public: + void addToOptions(mlir::OptionsContext &context) { + context.addOption("entrypoint", entrypoint, llvm::cl::init("main"), + llvm::cl::desc("entrypoint function name")); + } +}; + } // namespace mlirtrt::compiler #endif // MLIR_TENSORRT_COMPILER_OPTIONS diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h new file mode 100644 index 000000000..3a70598e7 --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h @@ -0,0 +1,30 @@ +//===- PassManagerUtils.h ---------------------------------------*- C++ -*-===// +// +// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include "mlir-tensorrt/Compiler/OptionsProviders.h" +#include "mlir/Pass/PassManager.h" + +//===----------------------------------------------------------------------===// +// Common helpers +//===----------------------------------------------------------------------===// + +mlir::LogicalResult +setupPassManager(mlir::PassManager &pm, + const mlirtrt::compiler::DebugOptions &options); diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h index e67b07bf2..7ae579193 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h @@ -52,7 +52,8 @@ namespace mlirtrt::compiler { class StablehloToExecutableTask; struct StablehloToExecutableOptions - : public mlir::OptionsBundle { + : public mlir::OptionsBundle { /// Initializes the options. The extensions in the provided registry /// must be extensions for the StableHloToExecutable task. StablehloToExecutableOptions(TaskExtensionRegistry extensions); @@ -64,9 +65,6 @@ struct StablehloToExecutableOptions /// and backend types that support allocating results. bool enableNonDPSReturns = false; - /// Entrypoint function name. - std::string entrypoint = "main"; - /// Base class for extensions associated with StableHloToExecutableTask. class ExtensionBase : public TaskExtensionBase { public: diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h new file mode 100644 index 000000000..23525cdd8 --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h @@ -0,0 +1,88 @@ +//===- TensorRTToExecutable.h -----------------------------------*- C++ -*-===// +// +// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE +#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE + +// TODO (pranavm): MLIR_TRT_TARGET_TENSORRT is only needed because we pull in +// the TranslateToTensorRT.h header. If we move the translation options, we +// won't need it. +#ifdef MLIR_TRT_TARGET_TENSORRT +#include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h" +#include "mlir-tensorrt-dialect/Utils/Options.h" +#include "mlir-tensorrt-dialect/Utils/OptionsBundle.h" +#include "mlir-tensorrt/Compiler/Client.h" +#include "mlir-tensorrt/Compiler/Extension.h" +#include "mlir-tensorrt/Compiler/OptionsProviders.h" +#include "mlir/Support/TypeID.h" + +namespace mlirtrt::compiler { + +//===----------------------------------------------------------------------===// +// TensorRTToExecutableOptions +//===----------------------------------------------------------------------===// + +// TODO (pranavm): Figure out a better way to reuse TRT translation options - +// maybe move to options providers? +struct TensorRTOptions + : public mlirtrt::compiler::OptionsProvider { + mlir::tensorrt::TensorRTTranslationOptions options; + + void addToOptions(mlir::OptionsContext &context) { + options.addToOptions(context); + } +}; + +struct TensorRTToExecutableOptions + : public mlir::OptionsBundle { + + TensorRTToExecutableOptions(TaskExtensionRegistry extensions); +}; + +//===----------------------------------------------------------------------===// +// TensorRTToExecutableTask +//===----------------------------------------------------------------------===// + +class TensorRTToExecutableTask + : public CompilationTask { +public: + using Base::Base; + + static void populatePassManager(mlir::PassManager &pm, + const TensorRTToExecutableOptions &options); +}; + +/// Register the task/options with the client's registry. +void registerTensorRTToExecutableTask(); + +//===----------------------------------------------------------------------===// +// Pipeline Registrations +//===----------------------------------------------------------------------===// + +// TODO (pranavm): How to do pipeline registration? +// void registerTensorRTPipelines(); + +} // namespace mlirtrt::compiler + +MLIR_DECLARE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask) + +#endif +#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h index b63b83d7f..8f4eb6c0b 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h @@ -23,6 +23,7 @@ #define REGISTRATION_REGISTERMLIRTENSORRTPASSES_H #include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h" +#include "mlir-tensorrt/Compiler/TensorRTToExecutable.h" #include "mlir-tensorrt/Conversion/Passes.h" #include "mlir-tensorrt/Transforms/Passes.h" #include "mlir/Conversion/Passes.h" @@ -52,6 +53,12 @@ inline void registerAllMlirTensorRtPasses() { mlir::registerTransformsPasses(); mlir::registerConvertPDLToPDLInterp(); + // TODO (pranavm): Check if this needs to be conditional - the TRT passes + // above are not. +#ifdef MLIR_TRT_TARGET_TENSORRT + mlirtrt::compiler::registerTensorRTToExecutableTask(); +#endif + #ifdef MLIR_TRT_ENABLE_HLO mlirtrt::compiler::registerStablehloClusteringPipelines(); registerStableHloInputPipelines(); diff --git a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt index 746bd2e81..e95381528 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt +++ b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt @@ -3,6 +3,7 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerClient Extension.cpp OptionsRegistry.cpp OptionsProviders.cpp + PassManagerUtils.cpp PARTIAL_SOURCES_INTENDED LINK_LIBS PUBLIC @@ -19,6 +20,8 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerStableHloToExecutable StableHloToExecutable.cpp # TODO: TensorRTExtension should be an independent library. TensorRTExtension/TensorRTExtension.cpp + # TODO (pranavm): TensorRTToExecutable should probably be a separate library + TensorRTToExecutable.cpp PARTIAL_SOURCES_INTENDED diff --git a/mlir-tensorrt/compiler/lib/Compiler/PassManagerUtils.cpp b/mlir-tensorrt/compiler/lib/Compiler/PassManagerUtils.cpp new file mode 100644 index 000000000..4546f4b31 --- /dev/null +++ b/mlir-tensorrt/compiler/lib/Compiler/PassManagerUtils.cpp @@ -0,0 +1,43 @@ +//===- PassManagerUtils.cpp -------------------------------------*- C++ -*-===// +// +// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include "mlir-tensorrt/Compiler/PassManagerUtils.h" + +using namespace mlirtrt::compiler; +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Common helpers +//===----------------------------------------------------------------------===// + +mlir::LogicalResult setupPassManager(mlir::PassManager &pm, + const DebugOptions &options) { + pm.enableVerifier(true); + mlir::applyDefaultTimingPassManagerCLOptions(pm); + if (failed(mlir::applyPassManagerCLOptions(pm))) + return mlir::failure(); + if (!options.dumpIRPath.empty()) { + pm.enableIRPrintingToFileTree( + [](Pass *, Operation *) { return false; }, + [](Pass *, Operation *) { return true; }, true, false, false, + options.dumpIRPath, OpPrintingFlags().elideLargeElementsAttrs(32)); + } + return mlir::success(); +} diff --git a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp index 3d609a93e..9edf0f23a 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp +++ b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp @@ -28,10 +28,10 @@ #include "mlir-executor/Support/Status.h" #include "mlir-executor/Target/Lua/TranslateToRuntimeExecutable.h" #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h" -#include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h" #include "mlir-tensorrt/Compiler/Extension.h" #include "mlir-tensorrt/Compiler/OptionsProviders.h" #include "mlir-tensorrt/Compiler/OptionsRegistry.h" +#include "mlir-tensorrt/Compiler/PassManagerUtils.h" #include "mlir-tensorrt/Compiler/TensorRTExtension/TensorRTExtension.h" #include "mlir-tensorrt/Conversion/Passes.h" #include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h" @@ -60,25 +60,6 @@ using namespace mlir; #ifdef MLIR_TRT_ENABLE_HLO -//===----------------------------------------------------------------------===// -// Common helpers -//===----------------------------------------------------------------------===// - -static mlir::LogicalResult setupPassManager(mlir::PassManager &pm, - const DebugOptions &options) { - pm.enableVerifier(true); - mlir::applyDefaultTimingPassManagerCLOptions(pm); - if (failed(mlir::applyPassManagerCLOptions(pm))) - return mlir::failure(); - if (!options.dumpIRPath.empty()) { - pm.enableIRPrintingToFileTree( - [](Pass *, Operation *) { return false; }, - [](Pass *, Operation *) { return true; }, true, false, false, - options.dumpIRPath, OpPrintingFlags().elideLargeElementsAttrs(32)); - } - return mlir::success(); -} - //===----------------------------------------------------------------------===// // Adhoc Passes //===----------------------------------------------------------------------===// @@ -162,9 +143,6 @@ StablehloToExecutableOptions::StablehloToExecutableOptions( disallowHostTensorsInTensorRTClusters, llvm::cl::init(false), llvm::cl::desc("Don't allow TensorRt clusters to contain host tensor " "calculations (but they can still be inputs)")); - - addOption("entrypoint", entrypoint, llvm::cl::init("main"), - llvm::cl::desc("entrypoint function name")); } //===----------------------------------------------------------------------===// @@ -189,7 +167,7 @@ void StablehloToExecutableTask::buildStablehloClusteringPipeline( populateExtensionPasses(pm, opts, Phase::PreClustering); plan::StablehloClusteringPassOptions clusteringOpts{}; - clusteringOpts.entrypoint = opts.entrypoint; + clusteringOpts.entrypoint = opts.get().entrypoint; plan::buildPlanSegmentationPipeline(pm, clusteringOpts); // Compile outlined funcs marked with `cluster.host`. The HLO in these @@ -465,7 +443,7 @@ static StablehloToExecutableOptions populateStablehloClusteringPipelineOpts( cliOpts.deviceMaxSharedMemoryPerBlockKb; opts.get().shouldInferFromHost = cliOpts.inferDeviceOptionsFromHost; - opts.entrypoint = cliOpts.entrypoint; + opts.get().entrypoint = cliOpts.entrypoint; return opts; } diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp new file mode 100644 index 000000000..796644552 --- /dev/null +++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp @@ -0,0 +1,127 @@ +//===- TensorRTToExecutable.cpp ---------------------------------*- C++ -*-===// +// +// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#ifdef MLIR_TRT_TARGET_TENSORRT + +#include "mlir-tensorrt/Compiler/TensorRTToExecutable.h" +#include "mlir-executor/Conversion/Passes.h" +#include "mlir-executor/Executor/Transforms/Passes.h" +#include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h" +#include "mlir-tensorrt/Compiler/OptionsRegistry.h" +#include "mlir-tensorrt/Compiler/PassManagerUtils.h" +#include "mlir-tensorrt/Conversion/Passes.h" +#include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h" +#include "mlir-tensorrt/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; +using namespace mlirtrt::compiler; + +TensorRTToExecutableOptions::TensorRTToExecutableOptions( + TaskExtensionRegistry extensions) { + // TODO (pranavm): We don't need extensions - remove from constructor and add + // `setExtensions` to base class. + assert(extensions.extensions.size() == 0); +} + +void TensorRTToExecutableTask::populatePassManager( + mlir::PassManager &pm, const TensorRTToExecutableOptions &options) { + if (failed(setupPassManager(pm, options.get()))) { + /// TODO: Ignored. This can fail if pass manager static CL options were not + /// registered/initialized. This happens through invocation of e.g. this + /// function in e.g. Python bindings or standalone calls to C++ or C API + /// without doing all the typical static CL setup. We should instead be + /// accepting a PassManager here that has already been setup to the caller's + /// specifications. + } + + // Post-clustering + pm.addPass(createConvertTensorRTToTensorRTRuntimePass()); + + pm.addNestedPass(plan::createPostClusteringValidationPass()); + + pm.addPass(createCanonicalizerPass()); + + pm.addPass(createInlinerPass()); + pm.addNestedPass(createCSEPass()); + pm.addNestedPass(createCanonicalizerPass()); + + // We then perform some final simplification on the top-level func.func ops + // (e.g. public entrypoint functions). + pm.addNestedPass(createSCFDetensorizeLoopsPass()); + pm.addNestedPass(createCanonicalizerPass()); + + // Pre-bufferization + // Simplify and translate functions nested in `tensorrt.module` ops. + auto &trtPM = pm.nest(); + tensorrt::buildTensorRTModuleTransformationPipeline( + trtPM, options.get().options.enableStronglyTyped); + trtPM.addPass(tensorrt::createTranslateTensorRTPass( + nullptr, options.get().options)); + + pm.addPass(createMemRefCastEliminationPass()); + pm.addPass(plan::createPlanAllocTensorsPass()); + pm.addPass(plan::createPlanBufferizePass()); + pm.addPass(createMemRefCastEliminationPass()); + pm.addPass(createCanonicalizerPass()); + pm.addPass(bufferization::createDropEquivalentBufferResultsPass()); + plan::buildPlanBufferOptimizationPipeline(pm); + plan::buildPlanBufferDeallocationPipeline( + pm, bufferization::DeallocationOptions{ + /*privateFuncDynamicOwnership=*/false}); + + // Post-bufferization + pm.addPass(createConvertMemRefToCUDAPass()); + pm.addPass(createConvertPlanToExecutorPass()); + pm.addPass(executor::createExecutorAllocsToGlobalsPass()); + pm.addNestedPass( + executor::createExecutorPopulateFunctionMetadataPass()); + + // Executor lowering + ConvertTensorRTRuntimeToExecutorPassOptions toExecutorOpts; + toExecutorOpts.indexBitwidth = options.get().indexBitwidth; + toExecutorOpts.usePackedMemRefCConv = + options.get().usePackedMemRefCConv; + pm.addPass(createConvertTensorRTRuntimeToExecutorPass(toExecutorOpts)); + + ConvertCUDAToExecutorPassOptions cudaToExecutorOpts; + cudaToExecutorOpts.indexBitwidth = + options.get().indexBitwidth; + cudaToExecutorOpts.usePackedMemRefCConv = + options.get().usePackedMemRefCConv; + pm.addPass(createConvertCUDAToExecutorPass(cudaToExecutorOpts)); + + pm.addPass(createDropNestedModulesPass()); + + mlir::executor::ConvertStdToExecutorPassOptions stdToExecOpts; + stdToExecOpts.indexBitwidth = options.get().indexBitwidth; + stdToExecOpts.usePackedMemRefCConv = true; + mlir::executor::buildExecutorLoweringPipeline(pm, stdToExecOpts); +} + +void mlirtrt::compiler::registerTensorRTToExecutableTask() { + registerOption("tensorrt-to-executable", + optionsCreateFromArgs); +} + +MLIR_DEFINE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask) + +#endif