Skip to content

Commit

Permalink
Add IGpuAllocator to MLIR-TensorRT
Browse files Browse the repository at this point in the history
  • Loading branch information
jhalakpatel committed Aug 15, 2024
1 parent b6836bb commit dec20fe
Show file tree
Hide file tree
Showing 13 changed files with 351 additions and 38 deletions.
4 changes: 2 additions & 2 deletions mlir-tensorrt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ We currently support only building on Linux x86 systems.
We support building several different ways (only via CMake) depending on use-case.

In each case, the LLVM-Project version that we are currently aligned to is
given in `build_tools/cmake/LLVMCommit.txt`.
given in `build_tools/cmake/LLVMCommit.cmake`.

Note that currently we provide an LLVM patch which essentially cherry-picks the
bug fixes from [this open MLIR PR](https://github.com/llvm/llvm-project/pull/91524).
Expand Down Expand Up @@ -82,7 +82,7 @@ git clone https://github.com/llvm/llvm-project.git llvm-project
# Checkout the right commit. Of course, you may try
# a newer commit or your own modified LLVM-Project.
cd llvm-project
git checkout $(cat build_tools/cmake/LLVMCommit.cmake | grep -Po '(?<=").*(?=")')
git checkout $(cat ../build_tools/cmake/LLVMCommit.cmake | grep -Po '(?<=").*(?=")')

# Apply patch from llvm-project PR 91524
git apply ../build_tools/llvm-project.patch
Expand Down
26 changes: 25 additions & 1 deletion mlir-tensorrt/executor/include/mlir-executor-c/Runtime/Runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,30 @@ mtrtScalarValueCastToRuntimeValue(MTRT_ScalarValue v);
MLIR_CAPI_EXPORTED MTRT_Status
mtrtScalarValueGetType(MTRT_ScalarValue scalar, MTRT_ScalarTypeCode *code);

//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

typedef struct MTRT_GpuAllocator {
void *ptr;
} MTRT_GpuAllocator;

MTRT_CAPI_EXPORTED bool mtrtGpuAllocatorIsNull(MTRT_GpuAllocator gpuAllocator);

MTRT_CAPI_EXPORTED MTRT_Status
mtrtGpuAllocatorDestroy(MTRT_GpuAllocator executable);


//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

MTRT_CAPI_EXPORTED MTRT_Status mtrtGpuAllocatorAllocate(MTRT_GpuAllocator gpuAllocator,
uint64_t size, uint64_t alignment,
void **memory);

MTRT_CAPI_EXPORTED MTRT_Status mtrtGpuAllocatorDeallocate(MTRT_GpuAllocator gpuAllocator, void *memory, bool *success);

//===----------------------------------------------------------------------===//
// MTRT_RuntimeSessionOptions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -352,7 +376,7 @@ typedef struct MTRT_RuntimeSession {
/// that the session only has a read-only view in to the Executable for code and
/// constant data. Therefore the Executable must outlive the RuntimeSession.
MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeSessionCreate(
MTRT_RuntimeSessionOptions options, MTRT_Executable executable,
MTRT_RuntimeSessionOptions options, MTRT_Executable executable, MTRT_GpuAllocator allocator,
MTRT_RuntimeSession *result);

/// Destory the session. This does not destroy the associated Executable, which
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,8 @@ class RuntimeSession {
sol::state state,
std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator,
std::unique_ptr<AllocTracker> allocTracker,
std::unique_ptr<ResourceTracker> resourceTracker);
std::unique_ptr<ResourceTracker> resourceTracker,
GpuAllocator* gpuAllocator);

ExecutableView getExecutable() const { return executable; }

Expand All @@ -854,14 +855,16 @@ class RuntimeSession {

ResourceTracker &getResourceTracker() { return *resourceTracker; }

GpuAllocator* getGpuAllocator() { return gpuAllocator; }

private:
RuntimeSessionOptions options;
ExecutableView executable;

std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator;
std::unique_ptr<AllocTracker> allocTracker;
std::unique_ptr<ResourceTracker> resourceTracker;

GpuAllocator* gpuAllocator;
sol::state state;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ void registerLuaRuntimeMethods(lua_State *state,
const RuntimeSessionOptions &options,
PinnedMemoryAllocator *pinnedMemoryAllocator,
AllocTracker *allocTracker,
ResourceTracker *resourceTracker);
ResourceTracker *resourceTracker, GpuAllocator* allocator);

} // namespace mlirtrt::runtime
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace mlirtrt::runtime {
/// `main` function. It is assumed that `main` takes no arguments and returns an
/// integer result (which is returned if the execution is successful).
/// TODO: this should take a handle to a function for streaming output/errors.
StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript);
StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript, GpuAllocator* allocator);

/// Synchronously run a serialized executor Executable one time. An `Executable`
/// is essentially a Lua script packaged with metadata and serialized constants
Expand All @@ -48,12 +48,12 @@ StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript);
/// execution is successful).
/// TODO: this should take a handle to a function for
/// streaming output/errors.
StatusOr<int64_t> runExecutorExecutable(std::unique_ptr<Executable> executable);
StatusOr<int64_t> runExecutorExecutable(std::unique_ptr<Executable> executable, GpuAllocator* allocator);

/// Create an execution state. This will setup a Lua environment and invoke
/// global initialization.
StatusOr<std::unique_ptr<RuntimeSession>>
createRuntimeSessionWithLuaBackend(ExecutableView executable,
createRuntimeSessionWithLuaBackend(ExecutableView executable, GpuAllocator* allocator,
const RuntimeSessionOptions &options);

/// Set the primary stream for the loaded executable to use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ResourceTracker;
/// Lua state.
void registerExecutorTensorRTModuleLuaRuntimeMethods(
lua_State *luaState, PinnedMemoryAllocator *pinnedMemoryAllocator,
AllocTracker *allocTracker, ResourceTracker *resourceTracker);
AllocTracker *allocTracker, ResourceTracker *resourceTracker, GpuAllocator* allocator);

} // namespace mlirtrt::runtime

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ namespace mlirtrt {

struct EventPool;

class GpuAllocator {
public:
virtual ~GpuAllocator() = default; // Add a virtual destructor
/// Allocate gpu memory. Needs to be implemented a client.
virtual void* allocate(uint64_t size, uint64_t alignment);
/// Returns true if deallocation succeeds
virtual bool deallocate(void* memory);
};

//===----------------------------------------------------------------------===//
// PoolTrackedCudaEvent
//===----------------------------------------------------------------------===//
Expand Down
25 changes: 24 additions & 1 deletion mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "mlir-executor/Runtime/API/API.h"
#include "mlir-executor/Runtime/API/ExecutableFlatbuffer.h"
#include "mlir-executor/Runtime/Backend/Lua/LuaRuntime.h"
#include "mlir-executor/Support/Allocators.h"
#include "mlir-executor/Support/Status.h"
#include "mlir/Support/FileUtilities.h"
#include "llvm/ADT/SmallVectorExtras.h"
Expand All @@ -48,6 +49,8 @@ DEFINE_C_API_PTR_METHODS(MTRT_RuntimeSession,
::mlirtrt::runtime::RuntimeSession)
DEFINE_C_API_PTR_METHODS(MTRT_RuntimeSessionOptions,
::mlirtrt::runtime::RuntimeSessionOptions)
DEFINE_C_API_PTR_METHODS(MTRT_GpuAllocator,
::mlirtrt::GpuAllocator)
DEFINE_C_API_PTR_METHODS(MTRT_Executable, ::mlirtrt::runtime::Executable)
DEFINE_C_API_PTR_METHODS(MTRT_Stream, MTRT_StreamImpl)
DEFINE_C_API_PTR_METHODS(MTRT_RuntimeValue, ::mlirtrt::runtime::RuntimeValue)
Expand Down Expand Up @@ -529,6 +532,24 @@ MTRT_ScalarValue mtrtRuntimeValueDynCastToScalar(MTRT_RuntimeValue v) {
return wrap(static_cast<ScalarValue *>(x));
}

//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

MTRT_Status mtrtGpuAllocatorAllocate(MTRT_GpuAllocator gpuAllocator,
uint64_t size, uint64_t alignment,
void **memory) {
GpuAllocator *cppGpuAllocator = unwrap(gpuAllocator);
*memory = cppGpuAllocator->allocate(size, alignment);
return mtrtStatusGetOk();
}

MTRT_Status mtrtGpuAllocatorDeallocate(MTRT_GpuAllocator gpuAllocator, void *memory, bool *success) {
GpuAllocator *cppGpuAllocator = unwrap(gpuAllocator);
*success = cppGpuAllocator->deallocate(memory);
return mtrtStatusGetOk();
}

//===----------------------------------------------------------------------===//
// MTRT_RuntimeSessionOptions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -556,12 +577,14 @@ mtrtRuntimeSessionOptionsDestroy(MTRT_RuntimeSessionOptions options) {

MTRT_Status mtrtRuntimeSessionCreate(MTRT_RuntimeSessionOptions options,
MTRT_Executable executable,
MTRT_GpuAllocator gpuAllocator,
MTRT_RuntimeSession *result) {
RuntimeSessionOptions *cppOptions = unwrap(options);
Executable *cppExecutable = unwrap(executable);
GpuAllocator *cppGpuAllocator = unwrap(gpuAllocator);

StatusOr<std::unique_ptr<RuntimeSession>> session =
createRuntimeSessionWithLuaBackend(cppExecutable->getView(), *cppOptions);
createRuntimeSessionWithLuaBackend(cppExecutable->getView(), cppGpuAllocator, *cppOptions);
if (session.isError())
return wrap(session.getStatus());

Expand Down
7 changes: 4 additions & 3 deletions mlir-tensorrt/executor/lib/Runtime/API/API.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,16 +348,17 @@ RuntimeSessionOptions::createUsingSingleHostMpi() {
//===----------------------------------------------------------------------===//
// RuntimeSession
//===----------------------------------------------------------------------===//

RuntimeSession::RuntimeSession(
RuntimeSessionOptions options, ExecutableView exe, sol::state state,
std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator,
std::unique_ptr<AllocTracker> allocTracker,
std::unique_ptr<ResourceTracker> resourceTracker)
std::unique_ptr<ResourceTracker> resourceTracker,
GpuAllocator *gpuAllocator)
: options(std::move(options)), executable(exe),
pinnedMemoryAllocator(std::move(pinnedMemoryAllocator)),
allocTracker(std::move(allocTracker)),
resourceTracker(std::move(resourceTracker)), state(std::move(state)) {}
resourceTracker(std::move(resourceTracker)), gpuAllocator(gpuAllocator),
state(std::move(state)) {}

//===----------------------------------------------------------------------===//
// AllocTracker
Expand Down
22 changes: 11 additions & 11 deletions mlir-tensorrt/executor/lib/Runtime/Backend/Lua/LuaRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static void registerDefaultDeviceDependentMethods(lua_State *state,

static void registerLuaRuntimeMethodsCommon(
lua_State *state, PinnedMemoryAllocator *pinnedMemoryAllocator,
AllocTracker *allocTracker, ResourceTracker *resourceTracker) {
AllocTracker *allocTracker, ResourceTracker *resourceTracker, GpuAllocator* allocator) {
registerExecutorCoreModuleLuaRuntimeMethods(state, pinnedMemoryAllocator,
allocTracker);
registerExecutorCUDAModuleLuaRuntimeMethods(
Expand All @@ -84,15 +84,15 @@ static void registerLuaRuntimeMethodsCommon(
#endif

registerExecutorTensorRTModuleLuaRuntimeMethods(
state, pinnedMemoryAllocator, allocTracker, resourceTracker);
state, pinnedMemoryAllocator, allocTracker, resourceTracker, allocator);
}

void mlirtrt::runtime::registerLuaRuntimeMethods(
lua_State *state, const RuntimeSessionOptions &options,
PinnedMemoryAllocator *pinnedMemoryAllocator, AllocTracker *allocTracker,
ResourceTracker *resourceTracker) {
ResourceTracker *resourceTracker, GpuAllocator* allocator) {
registerLuaRuntimeMethodsCommon(state, pinnedMemoryAllocator, allocTracker,
resourceTracker);
resourceTracker, allocator);
#ifdef MLIR_EXECUTOR_ENABLE_NCCL
registerExecutorNCCLModuleLuaRuntimeMethods(state, resourceTracker);
registerDeviceDependentNCCLMethods(state, options.getNumDevices(),
Expand All @@ -108,7 +108,7 @@ void mlirtrt::runtime::registerLuaRuntimeMethods(
}

StatusOr<int64_t>
mlirtrt::runtime::runExecutorLuaScript(std::string_view luaScript) {
mlirtrt::runtime::runExecutorLuaScript(std::string_view luaScript, GpuAllocator* allocator) {
ADD_RUNTIME_MODULE_RANGE("runtime_runExecutorLuaScript");

StatusOr<std::unique_ptr<RuntimeClient>> client = RuntimeClient::create();
Expand All @@ -120,7 +120,7 @@ mlirtrt::runtime::runExecutorLuaScript(std::string_view luaScript) {
registerLuaRuntimeMethods(lua.lua_state(), RuntimeSessionOptions(),
&(*client)->getPinnedMemorAllocator(),
&(*client)->getAllocTracker(),
&(*client)->getResourceTracker());
&(*client)->getResourceTracker(), allocator);

sol::protected_function_result result = lua.script(luaScript);
if (!result.valid()) {
Expand Down Expand Up @@ -171,7 +171,7 @@ static Status maybeCheckForValidNcclUuid(const RuntimeSessionOptions &options) {
/// global initialization.
StatusOr<std::unique_ptr<RuntimeSession>>
mlirtrt::runtime::createRuntimeSessionWithLuaBackend(
ExecutableView executable, const RuntimeSessionOptions &options) {
ExecutableView executable, GpuAllocator* allocator, const RuntimeSessionOptions &options) {
ADD_RUNTIME_MODULE_RANGE("runtime_loadExecutable");

MTRT_RETURN_IF_ERROR(maybeCheckForValidNcclUuid(options));
Expand All @@ -184,7 +184,7 @@ mlirtrt::runtime::createRuntimeSessionWithLuaBackend(
lua.open_libraries(sol::lib::base, sol::lib::string);
registerLuaRuntimeMethods(lua.lua_state(), options,
pinnedMemoryAllocator.get(), allocTracker.get(),
resourceTracker.get());
resourceTracker.get(), allocator);

// Load globals into the context.
// TODO: eliminate this copy, we already own the executable.
Expand Down Expand Up @@ -225,11 +225,11 @@ mlirtrt::runtime::createRuntimeSessionWithLuaBackend(
}
return std::make_unique<RuntimeSession>(
options, executable, std::move(lua), std::move(pinnedMemoryAllocator),
std::move(allocTracker), std::move(resourceTracker));
std::move(allocTracker), std::move(resourceTracker), allocator);
}

StatusOr<int64_t> mlirtrt::runtime::runExecutorExecutable(
std::unique_ptr<Executable> executable) {
std::unique_ptr<Executable> executable, GpuAllocator* allocator) {

StatusOr<std::unique_ptr<RuntimeClient>> client = RuntimeClient::create();
if (!client.isOk())
Expand All @@ -245,7 +245,7 @@ StatusOr<int64_t> mlirtrt::runtime::runExecutorExecutable(
return options.getStatus();

StatusOr<std::unique_ptr<RuntimeSession>> session =
createRuntimeSessionWithLuaBackend(executable->getView(), *options);
createRuntimeSessionWithLuaBackend(executable->getView(), allocator, *options);
if (!session.isOk())
return session.getStatus();

Expand Down
Loading

0 comments on commit dec20fe

Please sign in to comment.