Add trtrt.alloc_enqueue op

NVIDIA · Oct 10, 2024 · a669203 · a669203
1 parent 8a2ca79
commit a669203
Show file tree

Hide file tree

Showing 3 changed files with 247 additions and 83 deletions.
diff --git a/...-tensorrt/compiler/include/mlir-tensorrt/Dialect/TensorRTRuntime/IR/TensorRTRuntimeOps.td b/...-tensorrt/compiler/include/mlir-tensorrt/Dialect/TensorRTRuntime/IR/TensorRTRuntimeOps.td
@@ -116,73 +116,57 @@ def TensorRTRuntime_EnqueueOp : TensorRTRuntime_Op<"enqueue", [
   }];
 }
 
-// //===----------------------------------------------------------------------===//
-// // AllocEnqueueOp
-// //===----------------------------------------------------------------------===//
-
-// def Output_Desc : TypeDef<CUDA_Dialect, "Output_Desc", []> {
-//   let mnemonic = "Output_Desc";
-//   let description = [{
-//     An opaque object which represents a CUDA stream object (CUstream).
-//     A CUDA stream contains a sequence of operations that execute on GPU in the
-//     order in which they are issued by the host.
-//   }];
-// }
-
-// def TensorRTRuntime_AllocEnqueueOp : TensorRTRuntime_Op<"alloc_enqueue", [
-//     DeclareOpInterfaceMethods<InferTypeOpInterface>,
-//     DeclareOpInterfaceMethods<TensorKindOpInterface>,
-//     DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
-//     AttrSizedOperandSegments,
-//     DestinationStyleOpInterface
-// ]> {
-//   let description = [{
-
-//     Asynchronously executes the computation represented by the
-//     `execution_context` on the specified CUDA stream. This operation
-//     is a bufferizable destination-passing-style (DPS) operation.
-
-//     This means that the `inputs` and `outputs` can accept either
-//     all `tensor` types or all `memref` types. If the types are `tensor`
-//     types, then the the values passed to the `outs` parameter must
-//     be equal in type and number to the operation's results.
-
-//     When the `inputs` and `outputs` are `memref` types, then the
-//     operation should have no results.
-
-//     The `host_tensor_args` attribute is a list of indices into the
-//     `inputs` list indicating which arguments should be host tensors.
-//   }];
-
-//   let arguments = (ins TensorRTRuntime_Context:$execution_context,
-//                        CUDA_Stream:$stream,
-//                        Descriptor:$output_descriptors,
-//                        Variadic<AnyShaped>:$inputs,
-//                        OptionalAttr<DenseI64ArrayAttr>:$host_tensor_args);
-//   let results = (outs Variadic<AnyType>:$results);
-
-//   let assemblyFormat = [{
-//     $execution_context `stream` `(` $stream `)` ` `
-//     (`host_tensor_args` $host_tensor_args^ ` ` )?
-//     `(` $inputs `)` `outs` `(` $outs `)`
-//     attr-dict `:` functional-type($inputs, $outs)
-//   }];
-
-//   let hasVerifier = 1;
-
-//   let extraClassDeclaration = [{
-//     // Declare the outs as inits/outs to DestinationStyleOpInterface.
-//     MutableOperandRange getDpsInitsMutable() { return getOutsMutable(); }
-
-//     /// Return true if the operand at the specified index is a host tensor
-//     /// argument.
-//     bool isOperandOnHost(int64_t operandIdx) {
-//       if(std::optional<ArrayRef<int64_t>> indices = getHostTensorArgs()) {
-//         return llvm::is_contained(*indices, operandIdx - 2);
-//       }
-//       return false;
-//     }
-//   }];
-// }
+//===----------------------------------------------------------------------===//
+// AllocEnqueueOp
+//===----------------------------------------------------------------------===//
+
+def Output_Desc : TypeDef<CUDA_Dialect, "Output_Desc", []> {
+  let mnemonic = "Output_Desc";
+  let description = [{
+    An opaque object which represents a CUDA stream object (CUstream).
+    A CUDA stream contains a sequence of operations that execute on GPU in the
+    order in which they are issued by the host.
+  }];
+}
+
+def TensorRTRuntime_AllocEnqueueOp : TensorRTRuntime_Op<"alloc_enqueue", [
+    DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
+]> {
+  let description = [{
+    Asynchronously executes the computation represented by the
+    `execution_context` on the specified CUDA stream. This operation
+    can accept inputs of either tensor or memref types and returns
+    results of either tensor or memref types.
+  }];
+
+  let arguments = (ins 
+    TensorRTRuntime_Context:$execution_context,
+    CUDA_Stream:$stream,
+    Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$inputs,
+    OptionalAttr<DenseI64ArrayAttr>:$host_tensor_args
+  );
+
+  let results = (outs Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$results);
+
+  let assemblyFormat = [{
+    $execution_context `stream` `(` $stream `)` ` `
+    (`host_tensor_args` $host_tensor_args^ ` ` )?
+    `(` $inputs `)`
+    attr-dict `:` functional-type($inputs, $results)
+  }];
+
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    /// Return true if the operand at the specified index is a host tensor
+    /// argument.
+    bool isOperandOnHost(int64_t operandIdx) {
+      if(std::optional<ArrayRef<int64_t>> indices = getHostTensorArgs()) {
+        return llvm::is_contained(*indices, operandIdx - 2);
+      }
+      return false;
+    }
+  }];
+}
 
 #endif // MLIR_TENSORRT_DIALECT_TENSORRT_IR_TENSORRTRUNTIMEOPS_TD
diff --git a/mlir-tensorrt/compiler/lib/Dialect/TensorRTRuntime/IR/TensorRTRuntime.cpp b/mlir-tensorrt/compiler/lib/Dialect/TensorRTRuntime/IR/TensorRTRuntime.cpp
@@ -43,7 +43,7 @@ LogicalResult EnqueueOp::inferReturnTypes(
     SmallVectorImpl<Type> &inferredReturnTypes) {
   EnqueueOp::Adaptor adaptor(operands, attributes, properties, regions);
 
-  // If the `outs` operands are tensor types, then we shoudl return those as
+  // If the `outs` operands are tensor types, then we should return those as
   // results. Otherwise, for memref outs, we do not return results.
   for (Type t : TypeRange(adaptor.getOuts())) {
     auto tensorType = dyn_cast<TensorType>(t);
@@ -113,6 +113,61 @@ void EnqueueOp::getEffects(
   }
 }
 
+//===----------------------------------------------------------------------===//
+// AllocEnqueueOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult AllocEnqueueOp::verify() {
+  // Verify host tensor indices.
+  if (std::optional<ArrayRef<int64_t>> hostTensorIndices =
+          getHostTensorArgs()) {
+    // We don't count the context and stream argument here.
+    const int64_t numInputArgs = getInputs().size();
+    for (int64_t idx : *hostTensorIndices) {
+      if (idx >= numInputArgs || idx < 0)
+        return emitOpError("host_tensor_args value ")
+               << idx << " is out of bounds";
+      Value operand = getInputs()[idx];
+      Type elType = mlir::getElementTypeOrSelf(operand.getType());
+      if (!elType.isInteger(32))
+        return emitOpError("host tensor arguments must have element type i32, "
+                           "but input arg ")
+               << idx << " has type " << operand.getType();
+    }
+  }
+
+  // Verify that all results are either all tensors or all memrefs
+  if (getNumResults() > 0) {
+    bool allTensors = getResult(0).getType().isa<TensorType>();
+    for (auto result : getResults()) {
+      if (result.getType().isa<TensorType>() != allTensors) {
+        return emitOpError("all results must be of the same type (all tensors "
+                           "or all memrefs)");
+      }
+    }
+  }
+  return success();
+}
+
+void AllocEnqueueOp::getEffects(
+    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+        &effects) {
+  // This op allocates memory for its results
+  effects.emplace_back(MemoryEffects::Allocate::get(), 0,
+                       /*effectOnFullRegion=*/true);
+
+  for (OpOperand &operand : getInputsMutable()) {
+    if (!llvm::isa<MemRefType>(operand.get().getType()))
+      continue;
+    effects.emplace_back(MemoryEffects::Read::get(), &operand,
+                         SideEffects::DefaultResource::get());
+  }
+  for (OpResult result : getResults()) {
+    effects.emplace_back(MemoryEffects::Write::get(), result,
+                         SideEffects::DefaultResource::get());
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // TensorRTRuntimeDialect Interfaces
 //===----------------------------------------------------------------------===//