From 51169662b633fa676b8b90c1c90f1399c17ea770 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 19 Jul 2023 07:44:57 -0700 Subject: [PATCH 1/6] Add first pass at custom resource --- src/care/CMakeLists.txt | 1 + src/care/CudaUmpireResource.h | 119 ++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 src/care/CudaUmpireResource.h diff --git a/src/care/CMakeLists.txt b/src/care/CMakeLists.txt index 6c441ae0..d15385f0 100644 --- a/src/care/CMakeLists.txt +++ b/src/care/CMakeLists.txt @@ -27,6 +27,7 @@ set(care_headers care_inst.h CHAICallback.h CHAIDataGetter.h + CudaUmpireResource.h GPUWatchpoint.h Debug.h DefaultMacros.h diff --git a/src/care/CudaUmpireResource.h b/src/care/CudaUmpireResource.h new file mode 100644 index 00000000..1e6d4056 --- /dev/null +++ b/src/care/CudaUmpireResource.h @@ -0,0 +1,119 @@ +#ifndef CARE_CUDA_UMPIRE_RESOURCE_H +#define CARE_CUDA_UMPIRE_RESOURCE_H + +#include "camp/defines.hpp" + +#ifdef CAMP_ENABLE_CUDA + +#include "camp/resource/cuda.hpp" + +#include "umpire/ResourceManager.hpp" +#include "umpire/Allocator.hpp" + +#include + +namespace care { + class CudaUmpireResource : public camp::resources::Cuda { + public: + CudaUmpireResource() : + m_resourceManager{&umpire::ResourceManager::getInstance()} + { + m_deviceAllocator = &m_resourceManager->getAllocator("DEVICE"); + m_pinnedAllocator = &m_resourceManager->getAllocator("PINNED"); + m_managedAllocator = &m_resourceManager->getAllocator("UM"); + } + + CudaUmpireResource(const umpire::Allocator& deviceAllocator, + const umpire::Allocator& pinnedAllocator, + const umpire::Allocator& managedAllocator) : + m_resourceManager{&umpire::ResourceManager::getInstance()}, + m_deviceAllocator{&deviceAllocator}, + m_pinnedAllocator{&pinnedAllocator}, + m_managedAllocator{&managedAllocator} + { + } + + // Memory + template + T *allocate(size_t size, MemoryAccess ma = MemoryAccess::Device) { + T *ret = nullptr; + + if (size > 0) { + auto d{device_guard(device)}; + + switch (ma) { + case MemoryAccess::Unknown: + case MemoryAccess::Device: + ret = static_cast(m_deviceAllocator.allocate(sizeof(T) * size)); + break; + case MemoryAccess::Pinned: + // TODO: do a test here for whether managed is *actually* shared + // so we can use the better performing memory + ret = static_cast(m_pinnedAllocator.allocate(sizeof(T) * size)); + break; + case MemoryAccess::Managed: + ret = static_cast(m_managedAllocator.allocate(sizeof(T) * size)); + break; + } + } + + return ret; + } + + void *calloc(size_t size, MemoryAccess ma = MemoryAccess::Device) { + void *p = allocate(size, ma); + this->memset(p, 0, size); + return p; + } + + void deallocate(void *p, MemoryAccess ma = MemoryAccess::Unknown) { + auto d{device_guard(device)}; + + if (ma == MemoryAccess::Unknown) { + ma = get_access_type(p); + } + + switch (ma) { + case MemoryAccess::Device: + m_deviceAllocator.deallocate(p); + break; + case MemoryAccess::Pinned: + // TODO: do a test here for whether managed is *actually* shared + // so we can use the better performing memory + m_pinnedAllocator.deallocate(p); + break; + case MemoryAccess::Managed: + m_managedAllocator.deallocate(p); + break; + case MemoryAccess::Unknown: + ::camp::throw_re("Unknown memory access type, cannot free"); + } + } + + void memcpy(void *dst, const void *src, size_t size) { + if (size > 0) { + auto d{device_guard(device)}; + m_resourceManager->copy(dst, src, *this, size); + } + } + + void memset(void *p, int val, size_t size) + { + if (size > 0) { + auto d{device_guard(device)}; + m_resourceManager->memset(p, val, *this, size); + } + } + + private: + umpire::ResourceManager* m_resourceManager; + + umpire::Allocator* m_deviceAllocator; + umpire::Allocator* m_pinnedAllocator; + umpire::Allocator* m_managedAllocator; + }; // class CudaUmpireResource +} // namespace care + +#endif // CAMP_ENABLE_CUDA + +#endif // CARE_CUDA_UMPIRE_RESOURCE_H From 2a088c2fab952569fa8c18c2669c123c8c1c2ac6 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 19 Jul 2023 07:51:47 -0700 Subject: [PATCH 2/6] Add test for custom resource --- test/CMakeLists.txt | 15 +++++++++++++++ test/TestCudaUmpireResource.cpp | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/TestCudaUmpireResource.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 44db52c4..90f98867 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -163,3 +163,18 @@ target_include_directories(Benchmarks blt_add_test( NAME Benchmarks COMMAND Benchmarks ) + +if (ENABLE_CUDA) + blt_add_executable( NAME TestCudaUmpireResource + SOURCES TestCudaUmpireResource.cpp + DEPENDS_ON ${care_test_dependencies} ) + + target_include_directories(TestCudaUmpireResource + PRIVATE ${PROJECT_SOURCE_DIR}/src) + + target_include_directories(TestCudaUmpireResource + PRIVATE ${PROJECT_BINARY_DIR}/include) + + blt_add_test( NAME TestCudaUmpireResource + COMMAND TestCudaUmpireResource ) +endif () diff --git a/test/TestCudaUmpireResource.cpp b/test/TestCudaUmpireResource.cpp new file mode 100644 index 00000000..04c8485d --- /dev/null +++ b/test/TestCudaUmpireResource.cpp @@ -0,0 +1,33 @@ +////////////////////////////////////////////////////////////////////////////////////// +// Copyright 2020 Lawrence Livermore National Security, LLC and other CARE developers. +// See the top-level LICENSE file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////////////// + +#include "care/config.h" + +#if defined(CARE_GPUCC) + +// other library headers +#include "gtest/gtest.h" + +// care headers +#include "care/CudaUmpireResource.h" +#include "care/DefaultMacros.h" +#include "care/policies.h" +#include "care/detail/test_utils.h" + +GPU_TEST(CudaUmpireResource, gpu_initialization) { + printf("Initializing\n"); + init_care_for_testing(); + printf("Initialized... Testing care::array\n"); +} + +GPU_TEST(CudaUmpireResource, DefaultConstructor) +{ + care::CudaUmpireResource resource; +} + +#endif // CARE_GPUCC + From d1dfa688fcd87d61951938292dbbbcff5f0b45c1 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 19 Jul 2023 08:11:49 -0700 Subject: [PATCH 3/6] Fix lots of build errors --- src/care/CudaUmpireResource.h | 75 +++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/src/care/CudaUmpireResource.h b/src/care/CudaUmpireResource.h index 1e6d4056..54d3f891 100644 --- a/src/care/CudaUmpireResource.h +++ b/src/care/CudaUmpireResource.h @@ -5,6 +5,8 @@ #ifdef CAMP_ENABLE_CUDA +#include // camp/resource/cuda.hpp is missing this include + #include "camp/resource/cuda.hpp" #include "umpire/ResourceManager.hpp" @@ -18,40 +20,40 @@ namespace care { CudaUmpireResource() : m_resourceManager{&umpire::ResourceManager::getInstance()} { - m_deviceAllocator = &m_resourceManager->getAllocator("DEVICE"); - m_pinnedAllocator = &m_resourceManager->getAllocator("PINNED"); - m_managedAllocator = &m_resourceManager->getAllocator("UM"); + m_deviceAllocator = m_resourceManager->getAllocator("DEVICE"); + m_pinnedAllocator = m_resourceManager->getAllocator("PINNED"); + m_managedAllocator = m_resourceManager->getAllocator("UM"); } CudaUmpireResource(const umpire::Allocator& deviceAllocator, const umpire::Allocator& pinnedAllocator, const umpire::Allocator& managedAllocator) : m_resourceManager{&umpire::ResourceManager::getInstance()}, - m_deviceAllocator{&deviceAllocator}, - m_pinnedAllocator{&pinnedAllocator}, - m_managedAllocator{&managedAllocator} + m_deviceAllocator{deviceAllocator}, + m_pinnedAllocator{pinnedAllocator}, + m_managedAllocator{managedAllocator} { } // Memory template - T *allocate(size_t size, MemoryAccess ma = MemoryAccess::Device) { + T *allocate(size_t size, camp::resources::MemoryAccess ma = camp::resources::MemoryAccess::Device) { T *ret = nullptr; if (size > 0) { - auto d{device_guard(device)}; + auto d{camp::resources::device_guard(get_device())}; switch (ma) { - case MemoryAccess::Unknown: - case MemoryAccess::Device: + case camp::resources::MemoryAccess::Unknown: + case camp::resources::MemoryAccess::Device: ret = static_cast(m_deviceAllocator.allocate(sizeof(T) * size)); break; - case MemoryAccess::Pinned: + case camp::resources::MemoryAccess::Pinned: // TODO: do a test here for whether managed is *actually* shared // so we can use the better performing memory ret = static_cast(m_pinnedAllocator.allocate(sizeof(T) * size)); break; - case MemoryAccess::Managed: + case camp::resources::MemoryAccess::Managed: ret = static_cast(m_managedAllocator.allocate(sizeof(T) * size)); break; } @@ -60,39 +62,39 @@ namespace care { return ret; } - void *calloc(size_t size, MemoryAccess ma = MemoryAccess::Device) { + void *calloc(size_t size, camp::resources::MemoryAccess ma = camp::resources::MemoryAccess::Device) { void *p = allocate(size, ma); this->memset(p, 0, size); return p; } - void deallocate(void *p, MemoryAccess ma = MemoryAccess::Unknown) { - auto d{device_guard(device)}; + void deallocate(void *p, camp::resources::MemoryAccess ma = camp::resources::MemoryAccess::Unknown) { + auto d{camp::resources::device_guard(get_device())}; - if (ma == MemoryAccess::Unknown) { + if (ma == camp::resources::MemoryAccess::Unknown) { ma = get_access_type(p); } switch (ma) { - case MemoryAccess::Device: + case camp::resources::MemoryAccess::Device: m_deviceAllocator.deallocate(p); break; - case MemoryAccess::Pinned: + case camp::resources::MemoryAccess::Pinned: // TODO: do a test here for whether managed is *actually* shared // so we can use the better performing memory m_pinnedAllocator.deallocate(p); break; - case MemoryAccess::Managed: + case camp::resources::MemoryAccess::Managed: m_managedAllocator.deallocate(p); break; - case MemoryAccess::Unknown: + case camp::resources::MemoryAccess::Unknown: ::camp::throw_re("Unknown memory access type, cannot free"); } } void memcpy(void *dst, const void *src, size_t size) { if (size > 0) { - auto d{device_guard(device)}; + auto d{camp::resources::device_guard(get_device())}; m_resourceManager->copy(dst, src, *this, size); } } @@ -100,17 +102,40 @@ namespace care { void memset(void *p, int val, size_t size) { if (size > 0) { - auto d{device_guard(device)}; + auto d{camp::resources::device_guard(get_device())}; m_resourceManager->memset(p, val, *this, size); } } private: + // TODO: Make this a public or protected method in camp + camp::resources::MemoryAccess get_access_type(void *p) { + cudaPointerAttributes a; + cudaError_t status = cudaPointerGetAttributes(&a, p); + if (status == cudaSuccess) { + switch(a.type){ + case cudaMemoryTypeUnregistered: + return camp::resources::MemoryAccess::Unknown; + case cudaMemoryTypeHost: + return camp::resources::MemoryAccess::Pinned; + case cudaMemoryTypeDevice: + return camp::resources::MemoryAccess::Device; + case cudaMemoryTypeManaged: + return camp::resources::MemoryAccess::Managed; + } + } + ::camp::throw_re("invalid pointer detected"); + // This return statement exists because compilers do not determine the + // above unconditionally throws + // related: https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void-function-in-constexpr-if-fun + return camp::resources::MemoryAccess::Unknown; + } + umpire::ResourceManager* m_resourceManager; - umpire::Allocator* m_deviceAllocator; - umpire::Allocator* m_pinnedAllocator; - umpire::Allocator* m_managedAllocator; + umpire::Allocator m_deviceAllocator; + umpire::Allocator m_pinnedAllocator; + umpire::Allocator m_managedAllocator; }; // class CudaUmpireResource } // namespace care From c64031d37d56e92e733950ed474bf8455e2730e5 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 19 Jul 2023 08:21:32 -0700 Subject: [PATCH 4/6] Fix build errors --- src/care/CudaUmpireResource.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/care/CudaUmpireResource.h b/src/care/CudaUmpireResource.h index 54d3f891..e01fd0f1 100644 --- a/src/care/CudaUmpireResource.h +++ b/src/care/CudaUmpireResource.h @@ -95,7 +95,8 @@ namespace care { void memcpy(void *dst, const void *src, size_t size) { if (size > 0) { auto d{camp::resources::device_guard(get_device())}; - m_resourceManager->copy(dst, src, *this, size); + camp::resources::Resource resource(*this); + m_resourceManager->copy(dst, const_cast(src), resource, size); } } @@ -103,7 +104,8 @@ namespace care { { if (size > 0) { auto d{camp::resources::device_guard(get_device())}; - m_resourceManager->memset(p, val, *this, size); + camp::resources::Resource resource(*this); + m_resourceManager->memset(p, val, resource, size); } } From f12d735cc5eb161c7e297702c57b01950195ea74 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 19 Jul 2023 08:38:41 -0700 Subject: [PATCH 5/6] Add test case --- test/TestCudaUmpireResource.cpp | 46 ++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/test/TestCudaUmpireResource.cpp b/test/TestCudaUmpireResource.cpp index 04c8485d..93f9f33f 100644 --- a/test/TestCudaUmpireResource.cpp +++ b/test/TestCudaUmpireResource.cpp @@ -11,17 +11,14 @@ // other library headers #include "gtest/gtest.h" +#include "umpire/strategy/QuickPool.hpp" // care headers #include "care/CudaUmpireResource.h" -#include "care/DefaultMacros.h" -#include "care/policies.h" #include "care/detail/test_utils.h" GPU_TEST(CudaUmpireResource, gpu_initialization) { - printf("Initializing\n"); init_care_for_testing(); - printf("Initialized... Testing care::array\n"); } GPU_TEST(CudaUmpireResource, DefaultConstructor) @@ -29,5 +26,46 @@ GPU_TEST(CudaUmpireResource, DefaultConstructor) care::CudaUmpireResource resource; } +GPU_TEST(CudaUmpireResource, AllocatorConstructor) +{ + auto& rm = umpire::ResourceManager::getInstance(); + + // Device allocator + auto deviceAllocator = rm.getAllocator("DEVICE_POOL"); // Initialized above + auto customDeviceAllocator = + rm.makeAllocator("CUSTOM_DEVICE_POOL", + deviceAllocator, + 64*1024*1024, + 16*1024*1024); + + // Pinned allocator + auto pinnedAllocator = rm.getAllocator("PINNED_POOL"); // Initialized above + auto customPinnedAllocator = + rm.makeAllocator("CUSTOM_PINNED_POOL", + pinnedAllocator, + 8*1024*1024, + 2*1024*1024); + + // Managed allocator + auto managedAllocator = rm.getAllocator("UM"); // Umpire default + + // Make a unified memory pool to draw from (not done in init_care_for_testing()) + auto managedPoolAllocator = + rm.makeAllocator("UM_POOL", + managedAllocator, + 128*1024*1024, + 8*1024*1024); + + auto customManagedAllocator = + rm.makeAllocator("CUSTOM_UM_POOL", + managedPoolAllocator, + 8*1024*1024, + 2*1024*1024); + + care::CudaUmpireResource resource(customDeviceAllocator, + customPinnedAllocator, + customManagedAllocator); +} + #endif // CARE_GPUCC From 867a3cf22ac3c70db083a78f7083cbcf8ab49045 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 19 Jul 2023 14:43:26 -0700 Subject: [PATCH 6/6] Add experimental branch in RAJA to support custom resources --- tpl/raja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/raja b/tpl/raja index 3774f513..d7bf64de 160000 --- a/tpl/raja +++ b/tpl/raja @@ -1 +1 @@ -Subproject commit 3774f51339459bbbdb77055aa23f82919b6335b6 +Subproject commit d7bf64def3ad8313eda53798f3e2c1479ac97545