Skip to content

Commit

Permalink
[mlir-tensorrt] Add TensorRT 8.6 support (#391)
Browse files Browse the repository at this point in the history
This PR makes the following changes,
    - Make TensorRT 10.5 as default version
    - Add TensorRT 8.6 download support
    - Add TensorRT 8.6 to CI
    - TensorRT 9 checks from CI are removed to deal with
    device space error.
    - Fix tests to support above changes
  • Loading branch information
shelkesagar29 authored Nov 20, 2024
1 parent 3a8362c commit 89e2090
Show file tree
Hide file tree
Showing 10 changed files with 422 additions and 378 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/mlir-tensorrt-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ jobs:
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.2 \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DMLIR_TRT_USE_LINKER=lld \
-DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
Expand Down Expand Up @@ -191,7 +191,7 @@ jobs:
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.2 \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DMLIR_TRT_USE_LINKER=lld \
-DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF \
Expand All @@ -209,8 +209,8 @@ jobs:
bash build_and_test.sh
# Run LIT tests with TensorRT 9
- name: Run MLIR-TensorRT lit tests with TensorRT 9
# Run LIT tests with TensorRT 8
- name: Run MLIR-TensorRT lit tests with TensorRT 8
uses: addnab/docker-run-action@v3
with:
image: ${{ env.DEFAULT_IMAGE }}
Expand All @@ -235,7 +235,7 @@ jobs:
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=9.2.0.5 \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=8.6.1.6 \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DMLIR_TRT_USE_LINKER=lld \
-DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
Expand Down Expand Up @@ -324,7 +324,7 @@ jobs:
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.2 \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DMLIR_TRT_USE_LINKER=lld \
-DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
Expand Down Expand Up @@ -367,7 +367,7 @@ jobs:
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.2 \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DMLIR_TRT_USE_LINKER=lld \
-DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF \
Expand All @@ -385,8 +385,8 @@ jobs:
bash build_and_test.sh
# Run LIT tests with TensorRT 9
- name: Run MLIR-TensorRT lit tests with TensorRT 9
# Run LIT tests with TensorRT 8
- name: Run MLIR-TensorRT lit tests with TensorRT 8
uses: addnab/docker-run-action@v3
with:
image: ${{ env.DEFAULT_IMAGE }}
Expand All @@ -411,7 +411,7 @@ jobs:
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=9.2.0.5 \
-DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=8.6.1.6 \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DMLIR_TRT_USE_LINKER=lld \
-DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
Expand Down
2 changes: 1 addition & 1 deletion mlir-tensorrt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ mtrt_option(MLIR_TRT_ENABLE_EXECUTOR "Build the Executor dialect and MLIR-Tensor
mtrt_option(MLIR_TRT_ENABLE_NCCL "Enable the NCCL runtime module" ON)

set(MLIR_TRT_TENSORRT_DIR "" CACHE STRING "Path to TensorRT install directory")
set(MLIR_TRT_DOWNLOAD_TENSORRT_VERSION "10.2" CACHE STRING
set(MLIR_TRT_DOWNLOAD_TENSORRT_VERSION "10.5" CACHE STRING
"Version of TensorRT to download and use. It overrides MLIR_TRT_TENSORRT_DIR.")
set(MLIR_TRT_PACKAGE_CACHE_DIR "" CACHE STRING "Directory where to cache downloaded C++ packages")
set(MLIR_TRT_USE_LINKER "" CACHE STRING "Specify a linker to use (e.g. LLD); this is just an alias for LLVM_USE_LINKER")
Expand Down
38 changes: 34 additions & 4 deletions mlir-tensorrt/build_tools/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,15 @@ function(download_tensorrt)
if(ARG_VERSION VERSION_EQUAL "10.2")
set(ARG_VERSION "10.2.0.19")
endif()
# Canonicalize "10.5" version by setting it to the latest public TRT 10.5 version.
if(ARG_VERSION VERSION_EQUAL "10.5")
set(ARG_VERSION "10.5.0.18")
endif()

set(downloadable_versions
"9.0.1.4" "9.1.0.4" "9.2.0.5"
"8.6.1.6" "9.0.1.4" "9.1.0.4" "9.2.0.5"
"10.0.0.6" "10.1.0.27"
"10.2.0.19"
"10.2.0.19" "10.5.0.18"
)

if(NOT ARG_VERSION IN_LIST downloadable_versions)
Expand All @@ -100,6 +104,28 @@ function(download_tensorrt)

set(TRT_VERSION "${ARG_VERSION}")

# Handle TensorRT 8 versions. These are publicly accessible download links.
if(ARG_VERSION VERSION_LESS 9.0.0 AND ARG_VERSION VERSION_GREATER 8.0.0)
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" trt_short_version ${ARG_VERSION})
set(CUDA_VERSION "12.0")
set(OS "linux")
EXECUTE_PROCESS(COMMAND uname -m
COMMAND tr -d '\n'
OUTPUT_VARIABLE ARCH)
if(ARCH STREQUAL "arm64")
set(ARCH "aarch64")
set(OS "Ubuntu-20.04")
elseif(ARCH STREQUAL "amd64")
set(ARCH "x86_64")
set(OS "Linux")
elseif(ARCH STREQUAL "aarch64")
set(OS "Ubuntu-20.04")
elseif(NOT (ARCH STREQUAL "x86_64"))
message(FATAL_ERROR "Direct download not available for architecture: ${ARCH}")
endif()
set(_url "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/${trt_short_version}/tars/TensorRT-${TRT_VERSION}.${OS}.${ARCH}-gnu.cuda-${CUDA_VERSION}.tar.gz")
endif()

# Handle TensorRT 9 versions. These are publicly accessible download links.
if(ARG_VERSION VERSION_LESS 10.0.0 AND ARG_VERSION VERSION_GREATER 9.0.0)
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" trt_short_version ${ARG_VERSION})
Expand Down Expand Up @@ -137,19 +163,23 @@ function(download_tensorrt)
set(_url "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/tars/TensorRT-10.2.0.19.Linux.x86_64-gnu.cuda-12.5.tar.gz")
endif()

if(ARG_VERSION VERSION_EQUAL 10.5.0.18)
set(_url "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz")
endif()

if(NOT _url)
message(FATAL_ERROR "Could not determine TensorRT download URL")
endif()

message(STATUS "TensorRT Download URL: ${_url}")

CPMAddPackage(
NAME TensorRT9
NAME TensorRT
VERSION "${TRT_VERSION}"
URL ${_url}
DOWNLOAD_ONLY
)
set("${ARG_OUT_VAR}" "${TensorRT9_SOURCE_DIR}" PARENT_SCOPE)
set("${ARG_OUT_VAR}" "${TensorRT_SOURCE_DIR}" PARENT_SCOPE)
endfunction()

#-------------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2818,10 +2818,15 @@ struct PadConverter : public ConvertHloOpToTensorRTPattern<stablehlo::PadOp> {
auto padLowHighSum = trtRewriter.checkAndCreate<tensorrt::ElementWiseOp>(
loc, targetTrtMajorVersion, shapeTensorType, padLowConst, padHighConst,
tensorrt::ElementWiseOperation::kSUM);
if (!padLowHighSum)
return failure();
Value size = padLowHighSum.getResult();
size = trtRewriter.checkAndCreate<tensorrt::ElementWiseOp>(
auto sumWithResult = trtRewriter.checkAndCreate<tensorrt::ElementWiseOp>(
loc, targetTrtMajorVersion, shapeTensorType, size, shape.getResult(),
tensorrt::ElementWiseOperation::kSUM);
if (!sumWithResult)
return failure();
size = sumWithResult.getResult();

SmallVector<int32_t> stride(inputType.getRank(), 1);
return trtRewriter.checkAndReplaceOpWithNewOp<tensorrt::SliceOp>(
Expand Down Expand Up @@ -3858,7 +3863,7 @@ struct ConvertScatterToTensorRTScatterElements
if (!constOneTuple)
return failure();

Value newIndices = trtRewriter.checkAndCreate<tensorrt::LinspaceOp>(
auto newIndices = trtRewriter.checkAndCreate<tensorrt::LinspaceOp>(
op->getLoc(), targetTrtMajorVersion,
newUpdateType.clone(rewriter.getI32Type()), Value(), startIndex,
constOneTuple, FloatAttr(), FloatAttr());
Expand All @@ -3884,7 +3889,7 @@ struct ConvertScatterToTensorRTScatterElements
auto newOp = trtRewriter.checkAndCreate<tensorrt::ScatterElementsOp>(
op->getLoc(), targetTrtMajorVersion,
/*data*/ convertToI32(adaptor.getInputs().front()),
/*indices*/ newIndices,
/*indices*/ newIndices.getResult(),
/*updates*/ convertToI32(newUpdates),
/*axis*/ rewriter.getI64IntegerAttr(axis));
if (!newOp)
Expand All @@ -3894,7 +3899,8 @@ struct ConvertScatterToTensorRTScatterElements
auto newOp = trtRewriter.checkAndCreate<tensorrt::ScatterElementsOp>(
op->getLoc(), targetTrtMajorVersion,
/*data*/ adaptor.getInputs().front(),
/*indices*/ newIndices, /*updates*/ newUpdates.getResult(),
/*indices*/ newIndices.getResult(),
/*updates*/ newUpdates.getResult(),
/*axis*/ rewriter.getI64IntegerAttr(axis));
if (!newOp)
return failure();
Expand Down Expand Up @@ -4327,24 +4333,32 @@ struct DynamicUpdateSliceToConcatConverter
// start and shape to be the values appropriate for !hasNonZeroUpdateStart
// (static case). We will update them in the condition block.
// Calculate the slice start = update offset + update size.
TypedValue<RankedTensorType> concatDimOffset =
trtRewriter.checkAndCreate<tensorrt::ElementWiseOp>(
loc, targetTrtMajorVersion, updateStartOffset,
tensorrt::createConstShapeTensor(
rewriter, loc,
{static_cast<int32_t>(updateType.getDimSize(*concatAxis))}),
tensorrt::ElementWiseOperation::kSUM);
auto sliceStart = trtRewriter.checkAndCreate<tensorrt::ElementWiseOp>(
loc, targetTrtMajorVersion, updateStartOffset,
tensorrt::createConstShapeTensor(
rewriter, loc,
{static_cast<int32_t>(updateType.getDimSize(*concatAxis))}),
tensorrt::ElementWiseOperation::kSUM);
if (!sliceStart)
return failure();
TypedValue<RankedTensorType> concatDimOffset = sliceStart.getResult();

TypedValue<RankedTensorType> endOffset = tensorrt::scatterShapeTensor(
rewriter, loc, SmallVector<int64_t>(updateType.getRank(), 0),
*concatAxis, concatDimOffset);
// Calculate the slice size = result shape - update offset.
TypedValue<RankedTensorType> finalPartDimSize =
auto finalPartDimSizeOp =
trtRewriter.checkAndCreate<tensorrt::ElementWiseOp>(
loc, targetTrtMajorVersion,
tensorrt::createConstShapeTensor(
rewriter, loc,
{static_cast<int32_t>(resultType.getDimSize(*concatAxis))}),
concatDimOffset, tensorrt::ElementWiseOperation::kSUB);
if (!finalPartDimSizeOp)
return failure();
TypedValue<RankedTensorType> finalPartDimSize =
finalPartDimSizeOp.getResult();

TypedValue<RankedTensorType> endShape = tensorrt::scatterShapeTensor(
rewriter, loc, resultType.getShape(), *concatAxis, finalPartDimSize);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-tensorrt-opt -split-input-file %s --convert-stablehlo-to-tensorrt=convert-loops | FileCheck %s
// RUN: mlir-tensorrt-opt -split-input-file %s --convert-stablehlo-to-tensorrt="convert-loops=true trt-major-version=10" | FileCheck %s

func.func @while() -> tensor<i32> {
%arg0 = stablehlo.constant dense<0> : tensor<i32>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-tensorrt-opt -split-input-file %s --convert-stablehlo-to-tensorrt -verify-diagnostics | FileCheck %s
// RUN: mlir-tensorrt-opt -split-input-file %s --convert-stablehlo-to-tensorrt="trt-major-version=10" -verify-diagnostics | FileCheck %s

func.func @stablehlo_all_reduce_region(%arg0 : tensor<f32>) -> tensor<f32> {
%0 = "stablehlo.all_reduce"(%arg0) ({
Expand Down
Loading

0 comments on commit 89e2090

Please sign in to comment.