Skip to content

Commit

Permalink
Add trtrt.alloc_enqueue op
Browse files Browse the repository at this point in the history
  • Loading branch information
jhalakpatel committed Oct 10, 2024
1 parent 8a2ca79 commit a669203
Show file tree
Hide file tree
Showing 3 changed files with 247 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -116,73 +116,57 @@ def TensorRTRuntime_EnqueueOp : TensorRTRuntime_Op<"enqueue", [
}];
}

// //===----------------------------------------------------------------------===//
// // AllocEnqueueOp
// //===----------------------------------------------------------------------===//

// def Output_Desc : TypeDef<CUDA_Dialect, "Output_Desc", []> {
// let mnemonic = "Output_Desc";
// let description = [{
// An opaque object which represents a CUDA stream object (CUstream).
// A CUDA stream contains a sequence of operations that execute on GPU in the
// order in which they are issued by the host.
// }];
// }

// def TensorRTRuntime_AllocEnqueueOp : TensorRTRuntime_Op<"alloc_enqueue", [
// DeclareOpInterfaceMethods<InferTypeOpInterface>,
// DeclareOpInterfaceMethods<TensorKindOpInterface>,
// DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
// AttrSizedOperandSegments,
// DestinationStyleOpInterface
// ]> {
// let description = [{

// Asynchronously executes the computation represented by the
// `execution_context` on the specified CUDA stream. This operation
// is a bufferizable destination-passing-style (DPS) operation.

// This means that the `inputs` and `outputs` can accept either
// all `tensor` types or all `memref` types. If the types are `tensor`
// types, then the the values passed to the `outs` parameter must
// be equal in type and number to the operation's results.

// When the `inputs` and `outputs` are `memref` types, then the
// operation should have no results.

// The `host_tensor_args` attribute is a list of indices into the
// `inputs` list indicating which arguments should be host tensors.
// }];

// let arguments = (ins TensorRTRuntime_Context:$execution_context,
// CUDA_Stream:$stream,
// Descriptor:$output_descriptors,
// Variadic<AnyShaped>:$inputs,
// OptionalAttr<DenseI64ArrayAttr>:$host_tensor_args);
// let results = (outs Variadic<AnyType>:$results);

// let assemblyFormat = [{
// $execution_context `stream` `(` $stream `)` ` `
// (`host_tensor_args` $host_tensor_args^ ` ` )?
// `(` $inputs `)` `outs` `(` $outs `)`
// attr-dict `:` functional-type($inputs, $outs)
// }];

// let hasVerifier = 1;

// let extraClassDeclaration = [{
// // Declare the outs as inits/outs to DestinationStyleOpInterface.
// MutableOperandRange getDpsInitsMutable() { return getOutsMutable(); }

// /// Return true if the operand at the specified index is a host tensor
// /// argument.
// bool isOperandOnHost(int64_t operandIdx) {
// if(std::optional<ArrayRef<int64_t>> indices = getHostTensorArgs()) {
// return llvm::is_contained(*indices, operandIdx - 2);
// }
// return false;
// }
// }];
// }
//===----------------------------------------------------------------------===//
// AllocEnqueueOp
//===----------------------------------------------------------------------===//

def Output_Desc : TypeDef<CUDA_Dialect, "Output_Desc", []> {
let mnemonic = "Output_Desc";
let description = [{
An opaque object which represents a CUDA stream object (CUstream).
A CUDA stream contains a sequence of operations that execute on GPU in the
order in which they are issued by the host.
}];
}

def TensorRTRuntime_AllocEnqueueOp : TensorRTRuntime_Op<"alloc_enqueue", [
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
]> {
let description = [{
Asynchronously executes the computation represented by the
`execution_context` on the specified CUDA stream. This operation
can accept inputs of either tensor or memref types and returns
results of either tensor or memref types.
}];

let arguments = (ins
TensorRTRuntime_Context:$execution_context,
CUDA_Stream:$stream,
Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$inputs,
OptionalAttr<DenseI64ArrayAttr>:$host_tensor_args
);

let results = (outs Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$results);

let assemblyFormat = [{
$execution_context `stream` `(` $stream `)` ` `
(`host_tensor_args` $host_tensor_args^ ` ` )?
`(` $inputs `)`
attr-dict `:` functional-type($inputs, $results)
}];

let hasVerifier = 1;

let extraClassDeclaration = [{
/// Return true if the operand at the specified index is a host tensor
/// argument.
bool isOperandOnHost(int64_t operandIdx) {
if(std::optional<ArrayRef<int64_t>> indices = getHostTensorArgs()) {
return llvm::is_contained(*indices, operandIdx - 2);
}
return false;
}
}];
}

#endif // MLIR_TENSORRT_DIALECT_TENSORRT_IR_TENSORRTRUNTIMEOPS_TD
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ LogicalResult EnqueueOp::inferReturnTypes(
SmallVectorImpl<Type> &inferredReturnTypes) {
EnqueueOp::Adaptor adaptor(operands, attributes, properties, regions);

// If the `outs` operands are tensor types, then we shoudl return those as
// If the `outs` operands are tensor types, then we should return those as
// results. Otherwise, for memref outs, we do not return results.
for (Type t : TypeRange(adaptor.getOuts())) {
auto tensorType = dyn_cast<TensorType>(t);
Expand Down Expand Up @@ -113,6 +113,61 @@ void EnqueueOp::getEffects(
}
}

//===----------------------------------------------------------------------===//
// AllocEnqueueOp
//===----------------------------------------------------------------------===//

LogicalResult AllocEnqueueOp::verify() {
// Verify host tensor indices.
if (std::optional<ArrayRef<int64_t>> hostTensorIndices =
getHostTensorArgs()) {
// We don't count the context and stream argument here.
const int64_t numInputArgs = getInputs().size();
for (int64_t idx : *hostTensorIndices) {
if (idx >= numInputArgs || idx < 0)
return emitOpError("host_tensor_args value ")
<< idx << " is out of bounds";
Value operand = getInputs()[idx];
Type elType = mlir::getElementTypeOrSelf(operand.getType());
if (!elType.isInteger(32))
return emitOpError("host tensor arguments must have element type i32, "
"but input arg ")
<< idx << " has type " << operand.getType();
}
}

// Verify that all results are either all tensors or all memrefs
if (getNumResults() > 0) {
bool allTensors = getResult(0).getType().isa<TensorType>();
for (auto result : getResults()) {
if (result.getType().isa<TensorType>() != allTensors) {
return emitOpError("all results must be of the same type (all tensors "
"or all memrefs)");
}
}
}
return success();
}

void AllocEnqueueOp::getEffects(
SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
&effects) {
// This op allocates memory for its results
effects.emplace_back(MemoryEffects::Allocate::get(), 0,
/*effectOnFullRegion=*/true);

for (OpOperand &operand : getInputsMutable()) {
if (!llvm::isa<MemRefType>(operand.get().getType()))
continue;
effects.emplace_back(MemoryEffects::Read::get(), &operand,
SideEffects::DefaultResource::get());
}
for (OpResult result : getResults()) {
effects.emplace_back(MemoryEffects::Write::get(), result,
SideEffects::DefaultResource::get());
}
}

//===----------------------------------------------------------------------===//
// TensorRTRuntimeDialect Interfaces
//===----------------------------------------------------------------------===//
Expand Down
Loading

0 comments on commit a669203

Please sign in to comment.