-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a resource with custom allocator #230
Draft
adayton1
wants to merge
8
commits into
develop
Choose a base branch
from
feature/dayton8/custom_resource
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
5116966
Add first pass at custom resource
adayton1 2a088c2
Add test for custom resource
adayton1 d1dfa68
Fix lots of build errors
adayton1 c64031d
Fix build errors
adayton1 f12d735
Add test case
adayton1 1afed58
Merge branch 'develop' into feature/dayton8/custom_resource
adayton1 5448798
Merge branch 'develop' into feature/dayton8/custom_resource
adayton1 867a3cf
Add experimental branch in RAJA to support custom resources
adayton1 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
#ifndef CARE_CUDA_UMPIRE_RESOURCE_H | ||
#define CARE_CUDA_UMPIRE_RESOURCE_H | ||
|
||
#include "camp/defines.hpp" | ||
|
||
#ifdef CAMP_ENABLE_CUDA | ||
|
||
#include <mutex> // camp/resource/cuda.hpp is missing this include | ||
|
||
#include "camp/resource/cuda.hpp" | ||
|
||
#include "umpire/ResourceManager.hpp" | ||
#include "umpire/Allocator.hpp" | ||
|
||
#include <cuda_runtime.h> | ||
|
||
namespace care { | ||
class CudaUmpireResource : public camp::resources::Cuda { | ||
public: | ||
CudaUmpireResource() : | ||
m_resourceManager{&umpire::ResourceManager::getInstance()} | ||
{ | ||
m_deviceAllocator = m_resourceManager->getAllocator("DEVICE"); | ||
m_pinnedAllocator = m_resourceManager->getAllocator("PINNED"); | ||
m_managedAllocator = m_resourceManager->getAllocator("UM"); | ||
} | ||
|
||
CudaUmpireResource(const umpire::Allocator& deviceAllocator, | ||
const umpire::Allocator& pinnedAllocator, | ||
const umpire::Allocator& managedAllocator) : | ||
m_resourceManager{&umpire::ResourceManager::getInstance()}, | ||
m_deviceAllocator{deviceAllocator}, | ||
m_pinnedAllocator{pinnedAllocator}, | ||
m_managedAllocator{managedAllocator} | ||
{ | ||
} | ||
|
||
// Memory | ||
template <typename T> | ||
T *allocate(size_t size, camp::resources::MemoryAccess ma = camp::resources::MemoryAccess::Device) { | ||
T *ret = nullptr; | ||
|
||
if (size > 0) { | ||
auto d{camp::resources::device_guard(get_device())}; | ||
|
||
switch (ma) { | ||
case camp::resources::MemoryAccess::Unknown: | ||
case camp::resources::MemoryAccess::Device: | ||
ret = static_cast<T*>(m_deviceAllocator.allocate(sizeof(T) * size)); | ||
break; | ||
case camp::resources::MemoryAccess::Pinned: | ||
// TODO: do a test here for whether managed is *actually* shared | ||
// so we can use the better performing memory | ||
ret = static_cast<T*>(m_pinnedAllocator.allocate(sizeof(T) * size)); | ||
break; | ||
case camp::resources::MemoryAccess::Managed: | ||
ret = static_cast<T*>(m_managedAllocator.allocate(sizeof(T) * size)); | ||
break; | ||
} | ||
} | ||
|
||
return ret; | ||
} | ||
|
||
void *calloc(size_t size, camp::resources::MemoryAccess ma = camp::resources::MemoryAccess::Device) { | ||
void *p = allocate<char>(size, ma); | ||
this->memset(p, 0, size); | ||
return p; | ||
} | ||
|
||
void deallocate(void *p, camp::resources::MemoryAccess ma = camp::resources::MemoryAccess::Unknown) { | ||
auto d{camp::resources::device_guard(get_device())}; | ||
|
||
if (ma == camp::resources::MemoryAccess::Unknown) { | ||
ma = get_access_type(p); | ||
} | ||
|
||
switch (ma) { | ||
case camp::resources::MemoryAccess::Device: | ||
m_deviceAllocator.deallocate(p); | ||
break; | ||
case camp::resources::MemoryAccess::Pinned: | ||
// TODO: do a test here for whether managed is *actually* shared | ||
// so we can use the better performing memory | ||
m_pinnedAllocator.deallocate(p); | ||
break; | ||
case camp::resources::MemoryAccess::Managed: | ||
m_managedAllocator.deallocate(p); | ||
break; | ||
case camp::resources::MemoryAccess::Unknown: | ||
::camp::throw_re("Unknown memory access type, cannot free"); | ||
} | ||
} | ||
|
||
void memcpy(void *dst, const void *src, size_t size) { | ||
if (size > 0) { | ||
auto d{camp::resources::device_guard(get_device())}; | ||
camp::resources::Resource resource(*this); | ||
m_resourceManager->copy(dst, const_cast<void*>(src), resource, size); | ||
} | ||
} | ||
|
||
void memset(void *p, int val, size_t size) | ||
{ | ||
if (size > 0) { | ||
auto d{camp::resources::device_guard(get_device())}; | ||
camp::resources::Resource resource(*this); | ||
m_resourceManager->memset(p, val, resource, size); | ||
} | ||
} | ||
|
||
private: | ||
// TODO: Make this a public or protected method in camp | ||
camp::resources::MemoryAccess get_access_type(void *p) { | ||
cudaPointerAttributes a; | ||
cudaError_t status = cudaPointerGetAttributes(&a, p); | ||
if (status == cudaSuccess) { | ||
switch(a.type){ | ||
case cudaMemoryTypeUnregistered: | ||
return camp::resources::MemoryAccess::Unknown; | ||
case cudaMemoryTypeHost: | ||
return camp::resources::MemoryAccess::Pinned; | ||
case cudaMemoryTypeDevice: | ||
return camp::resources::MemoryAccess::Device; | ||
case cudaMemoryTypeManaged: | ||
return camp::resources::MemoryAccess::Managed; | ||
} | ||
} | ||
::camp::throw_re("invalid pointer detected"); | ||
// This return statement exists because compilers do not determine the | ||
// above unconditionally throws | ||
// related: https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void-function-in-constexpr-if-fun | ||
return camp::resources::MemoryAccess::Unknown; | ||
} | ||
|
||
umpire::ResourceManager* m_resourceManager; | ||
|
||
umpire::Allocator m_deviceAllocator; | ||
umpire::Allocator m_pinnedAllocator; | ||
umpire::Allocator m_managedAllocator; | ||
}; // class CudaUmpireResource | ||
} // namespace care | ||
|
||
#endif // CAMP_ENABLE_CUDA | ||
|
||
#endif // CARE_CUDA_UMPIRE_RESOURCE_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
////////////////////////////////////////////////////////////////////////////////////// | ||
// Copyright 2020 Lawrence Livermore National Security, LLC and other CARE developers. | ||
// See the top-level LICENSE file for details. | ||
// | ||
// SPDX-License-Identifier: BSD-3-Clause | ||
////////////////////////////////////////////////////////////////////////////////////// | ||
|
||
#include "care/config.h" | ||
|
||
#if defined(CARE_GPUCC) | ||
|
||
// other library headers | ||
#include "gtest/gtest.h" | ||
#include "umpire/strategy/QuickPool.hpp" | ||
|
||
// care headers | ||
#include "care/CudaUmpireResource.h" | ||
#include "care/detail/test_utils.h" | ||
|
||
GPU_TEST(CudaUmpireResource, gpu_initialization) { | ||
init_care_for_testing(); | ||
} | ||
|
||
GPU_TEST(CudaUmpireResource, DefaultConstructor) | ||
{ | ||
care::CudaUmpireResource resource; | ||
} | ||
|
||
GPU_TEST(CudaUmpireResource, AllocatorConstructor) | ||
{ | ||
auto& rm = umpire::ResourceManager::getInstance(); | ||
|
||
// Device allocator | ||
auto deviceAllocator = rm.getAllocator("DEVICE_POOL"); // Initialized above | ||
auto customDeviceAllocator = | ||
rm.makeAllocator<umpire::strategy::QuickPool>("CUSTOM_DEVICE_POOL", | ||
deviceAllocator, | ||
64*1024*1024, | ||
16*1024*1024); | ||
|
||
// Pinned allocator | ||
auto pinnedAllocator = rm.getAllocator("PINNED_POOL"); // Initialized above | ||
auto customPinnedAllocator = | ||
rm.makeAllocator<umpire::strategy::QuickPool>("CUSTOM_PINNED_POOL", | ||
pinnedAllocator, | ||
8*1024*1024, | ||
2*1024*1024); | ||
|
||
// Managed allocator | ||
auto managedAllocator = rm.getAllocator("UM"); // Umpire default | ||
|
||
// Make a unified memory pool to draw from (not done in init_care_for_testing()) | ||
auto managedPoolAllocator = | ||
rm.makeAllocator<umpire::strategy::QuickPool>("UM_POOL", | ||
managedAllocator, | ||
128*1024*1024, | ||
8*1024*1024); | ||
|
||
auto customManagedAllocator = | ||
rm.makeAllocator<umpire::strategy::QuickPool>("CUSTOM_UM_POOL", | ||
managedPoolAllocator, | ||
8*1024*1024, | ||
2*1024*1024); | ||
|
||
care::CudaUmpireResource resource(customDeviceAllocator, | ||
customPinnedAllocator, | ||
customManagedAllocator); | ||
} | ||
|
||
#endif // CARE_GPUCC | ||
|
Submodule raja
updated
9 files
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@neelakausik, essentially you will need to create a device pool, a pinned pool, and a unified memory pool per thread, then use those to create a resource per thread. The call to init_care_for_testing() above creates a GPU wide pool called "DEVICE_POOL" and another pool called "PINNED_POOL". Those should be the basis for the per thread device and pinned memory pools. On line 53 I create a GPU wide unified memory pool called "UM_POOL". You will need to create that manually in whatever test cases you have, then have that allocator be the basis for the per thread unified memory pool.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should test out this functionality in standalone CARE. Write some tests and benchmarks that involve RAJA reducers, scans, and CARE loops.