From 5f435117b4d94dae0cb30299f525335ec41e7e27 Mon Sep 17 00:00:00 2001 From: "Nick J. Browning" Date: Mon, 29 Apr 2024 23:09:23 +0200 Subject: [PATCH 1/8] added fix for conditional instantiation of cuda calculator. --- sphericart-torch/src/torch.cpp | 14 +++++++++++--- sphericart/include/sphericart_cuda.hpp | 6 ++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/sphericart-torch/src/torch.cpp b/sphericart-torch/src/torch.cpp index aef6907d..15cecbcb 100644 --- a/sphericart-torch/src/torch.cpp +++ b/sphericart-torch/src/torch.cpp @@ -2,21 +2,29 @@ #include "sphericart/torch.hpp" #include +#include #include "sphericart/autograd.hpp" #include "sphericart/torch_cuda_wrapper.hpp" +using namespace torch; using namespace sphericart_torch; + SphericalHarmonics::SphericalHarmonics(int64_t l_max, bool normalized, bool backward_second_derivatives) : l_max_(l_max), normalized_(normalized), backward_second_derivatives_(backward_second_derivatives), - calculator_double_(l_max_, normalized_), calculator_float_(l_max_, normalized_), - - calculator_cuda_double_(l_max_, normalized_), calculator_cuda_float_(l_max_, normalized_) //, + calculator_double_(l_max_, normalized_), calculator_float_(l_max_, normalized_) { this->omp_num_threads_ = calculator_double_.get_omp_num_threads(); + + if (torch::cuda::is_available()) { + this->calculator_cuda_double_ = + sphericart::cuda::SphericalHarmonics(l_max_, normalized_); + this->calculator_cuda_float_ = + sphericart::cuda::SphericalHarmonics(l_max_, normalized_); + } } torch::Tensor SphericalHarmonics::compute(torch::Tensor xyz) { diff --git a/sphericart/include/sphericart_cuda.hpp b/sphericart/include/sphericart_cuda.hpp index 63d6d046..ff091666 100644 --- a/sphericart/include/sphericart_cuda.hpp +++ b/sphericart/include/sphericart_cuda.hpp @@ -41,6 +41,12 @@ template class SphericalHarmonics { */ SphericalHarmonics(size_t l_max, bool normalized = false); + /** Default constructor + * Required so sphericart_torch can conditionally instantiate this class + * depending on if cuda is available. + */ + SphericalHarmonics() = default; + /* @cond */ ~SphericalHarmonics(); /* @endcond */ From b8c66388af32bccb5025f5a668712c5aedb551e5 Mon Sep 17 00:00:00 2001 From: "Nick J. Browning" Date: Mon, 29 Apr 2024 23:10:28 +0200 Subject: [PATCH 2/8] formatting. --- sphericart/include/sphericart_cuda.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphericart/include/sphericart_cuda.hpp b/sphericart/include/sphericart_cuda.hpp index ff091666..933acd13 100644 --- a/sphericart/include/sphericart_cuda.hpp +++ b/sphericart/include/sphericart_cuda.hpp @@ -42,7 +42,7 @@ template class SphericalHarmonics { SphericalHarmonics(size_t l_max, bool normalized = false); /** Default constructor - * Required so sphericart_torch can conditionally instantiate this class + * Required so sphericart_torch can conditionally instantiate this class * depending on if cuda is available. */ SphericalHarmonics() = default; From eae25874d0bbed08c272c9ce72319fe10305bc3e Mon Sep 17 00:00:00 2001 From: "Nick J. Browning" Date: Mon, 29 Apr 2024 23:19:12 +0200 Subject: [PATCH 3/8] unecessary import. --- sphericart-torch/src/torch.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sphericart-torch/src/torch.cpp b/sphericart-torch/src/torch.cpp index 15cecbcb..cb7b19fe 100644 --- a/sphericart-torch/src/torch.cpp +++ b/sphericart-torch/src/torch.cpp @@ -2,7 +2,6 @@ #include "sphericart/torch.hpp" #include -#include #include "sphericart/autograd.hpp" #include "sphericart/torch_cuda_wrapper.hpp" @@ -13,9 +12,7 @@ using namespace sphericart_torch; SphericalHarmonics::SphericalHarmonics(int64_t l_max, bool normalized, bool backward_second_derivatives) : l_max_(l_max), normalized_(normalized), backward_second_derivatives_(backward_second_derivatives), - calculator_double_(l_max_, normalized_), calculator_float_(l_max_, normalized_) - { this->omp_num_threads_ = calculator_double_.get_omp_num_threads(); From 3bf757eea995d980a4ea2150723041bb67cb516d Mon Sep 17 00:00:00 2001 From: "Nick J. Browning" Date: Fri, 3 May 2024 12:43:23 +0200 Subject: [PATCH 4/8] fixed pointer issues. --- sphericart-torch/include/sphericart/torch.hpp | 5 +- sphericart-torch/src/autograd.cpp | 4 +- sphericart-torch/src/torch.cpp | 22 +++-- sphericart/include/sphericart_cuda.hpp | 11 +-- sphericart/src/sphericart_cuda.cu | 88 +++++++++++-------- 5 files changed, 75 insertions(+), 55 deletions(-) diff --git a/sphericart-torch/include/sphericart/torch.hpp b/sphericart-torch/include/sphericart/torch.hpp index 6bc814e8..88f3bdb1 100644 --- a/sphericart-torch/include/sphericart/torch.hpp +++ b/sphericart-torch/include/sphericart/torch.hpp @@ -18,6 +18,7 @@ class SphericalHarmonics : public torch::CustomClassHolder { SphericalHarmonics( int64_t l_max, bool normalized = false, bool backward_second_derivatives = false ); + ~SphericalHarmonics(); // Actual calculation, with autograd support torch::Tensor compute(torch::Tensor xyz); @@ -45,8 +46,8 @@ class SphericalHarmonics : public torch::CustomClassHolder { sphericart::SphericalHarmonics calculator_float_; // CUDA implementation - sphericart::cuda::SphericalHarmonics calculator_cuda_double_; - sphericart::cuda::SphericalHarmonics calculator_cuda_float_; + sphericart::cuda::SphericalHarmonics* calculator_cuda_double_ptr = nullptr; + sphericart::cuda::SphericalHarmonics* calculator_cuda_float_ptr = nullptr; }; } // namespace sphericart_torch diff --git a/sphericart-torch/src/autograd.cpp b/sphericart-torch/src/autograd.cpp index 74498703..eb89e337 100644 --- a/sphericart-torch/src/autograd.cpp +++ b/sphericart-torch/src/autograd.cpp @@ -233,7 +233,7 @@ torch::autograd::variable_list SphericalHarmonicsAutograd::forward( } if (xyz.dtype() == c10::kDouble) { - calculator.calculator_cuda_double_.compute( + calculator.calculator_cuda_double_ptr->compute( xyz.data_ptr(), xyz.size(0), requires_grad, @@ -245,7 +245,7 @@ torch::autograd::variable_list SphericalHarmonicsAutograd::forward( ); } else if (xyz.dtype() == c10::kFloat) { - calculator.calculator_cuda_float_.compute( + calculator.calculator_cuda_float_ptr->compute( xyz.data_ptr(), xyz.size(0), requires_grad, diff --git a/sphericart-torch/src/torch.cpp b/sphericart-torch/src/torch.cpp index cb7b19fe..bcafd44e 100644 --- a/sphericart-torch/src/torch.cpp +++ b/sphericart-torch/src/torch.cpp @@ -8,19 +8,29 @@ using namespace torch; using namespace sphericart_torch; +using namespace std; SphericalHarmonics::SphericalHarmonics(int64_t l_max, bool normalized, bool backward_second_derivatives) : l_max_(l_max), normalized_(normalized), backward_second_derivatives_(backward_second_derivatives), - calculator_double_(l_max_, normalized_), calculator_float_(l_max_, normalized_) -{ + calculator_double_(l_max_, normalized_), calculator_float_(l_max_, normalized_) { this->omp_num_threads_ = calculator_double_.get_omp_num_threads(); if (torch::cuda::is_available()) { - this->calculator_cuda_double_ = - sphericart::cuda::SphericalHarmonics(l_max_, normalized_); - this->calculator_cuda_float_ = - sphericart::cuda::SphericalHarmonics(l_max_, normalized_); + this->calculator_cuda_double_ptr = + new sphericart::cuda::SphericalHarmonics(l_max_, normalized_); + this->calculator_cuda_float_ptr = + new sphericart::cuda::SphericalHarmonics(l_max_, normalized_); + } +} + +SphericalHarmonics::~SphericalHarmonics() { + if (this->calculator_cuda_double_ptr != nullptr) { + delete this->calculator_cuda_double_ptr; + } + + if (this->calculator_cuda_float_ptr != nullptr) { + delete this->calculator_cuda_float_ptr; } } diff --git a/sphericart/include/sphericart_cuda.hpp b/sphericart/include/sphericart_cuda.hpp index 933acd13..a9e47e9f 100644 --- a/sphericart/include/sphericart_cuda.hpp +++ b/sphericart/include/sphericart_cuda.hpp @@ -45,7 +45,6 @@ template class SphericalHarmonics { * Required so sphericart_torch can conditionally instantiate this class * depending on if cuda is available. */ - SphericalHarmonics() = default; /* @cond */ ~SphericalHarmonics(); @@ -101,14 +100,12 @@ template class SphericalHarmonics { private: size_t l_max; // maximum l value computed by this class size_t nprefactors; - bool normalized; // should we normalize the input vectors? - T* prefactors_cpu; // host prefactors buffer - T** prefactors_cuda; // storage space for prefactors - int device_count; // number of visible GPU devices - + bool normalized; // should we normalize the input vectors? + T* prefactors_cpu = nullptr; // host prefactors buffer + T** prefactors_cuda = nullptr; // storage space for prefactors + int device_count = 0; // number of visible GPU devices int64_t CUDA_GRID_DIM_X_ = 8; int64_t CUDA_GRID_DIM_Y_ = 8; - bool cached_compute_with_gradients = false; bool cached_compute_with_hessian = false; int64_t _current_shared_mem_allocation = 0; diff --git a/sphericart/src/sphericart_cuda.cu b/sphericart/src/sphericart_cuda.cu index d05b7a28..a894d7c9 100644 --- a/sphericart/src/sphericart_cuda.cu +++ b/sphericart/src/sphericart_cuda.cu @@ -30,66 +30,78 @@ template SphericalHarmonics::SphericalHarmonics(size_t l_max, bo buffer space, compute prefactors, and sets the function pointers that are used for the actual calls */ - this->l_max = (int)l_max; this->nprefactors = (int)(l_max + 1) * (l_max + 2); this->normalized = normalized; this->prefactors_cpu = new T[this->nprefactors]; + CUDA_CHECK(cudaGetDeviceCount(&this->device_count)); + // compute prefactors on host first compute_sph_prefactors((int)l_max, this->prefactors_cpu); - CUDA_CHECK(cudaGetDeviceCount(&this->device_count)); + if (this->device_count) { + int current_device; - int current_device; + CUDA_CHECK(cudaGetDevice(¤t_device)); - CUDA_CHECK(cudaGetDevice(¤t_device)); + // allocate prefactorts on every visible device and copy from host + this->prefactors_cuda = new T*[this->device_count]; - // allocate prefactorts on every visible device and copy from host - this->prefactors_cuda = new T*[this->device_count]; + for (int device = 0; device < this->device_count; device++) { + CUDA_CHECK(cudaSetDevice(device)); + CUDA_CHECK( + cudaMalloc((void**)&this->prefactors_cuda[device], this->nprefactors * sizeof(T)) + ); + CUDA_CHECK(cudaMemcpy( + this->prefactors_cuda[device], + this->prefactors_cpu, + this->nprefactors * sizeof(T), + cudaMemcpyHostToDevice + )); + } - for (int device = 0; device < this->device_count; device++) { - CUDA_CHECK(cudaSetDevice(device)); - CUDA_CHECK(cudaMalloc((void**)&this->prefactors_cuda[device], this->nprefactors * sizeof(T)) + // initialise the currently available amount of shared memory on all visible devices + this->_current_shared_mem_allocation = adjust_shared_memory( + sizeof(T), + this->l_max, + this->CUDA_GRID_DIM_X_, + this->CUDA_GRID_DIM_Y_, + false, + false, + this->_current_shared_mem_allocation ); - CUDA_CHECK(cudaMemcpy( - this->prefactors_cuda[device], - this->prefactors_cpu, - this->nprefactors * sizeof(T), - cudaMemcpyHostToDevice - )); - } - // initialise the currently available amount of shared memory on all visible devices - this->_current_shared_mem_allocation = adjust_shared_memory( - sizeof(T), - this->l_max, - this->CUDA_GRID_DIM_X_, - this->CUDA_GRID_DIM_Y_, - false, - false, - this->_current_shared_mem_allocation - ); - - // set the context back to the current device - CUDA_CHECK(cudaSetDevice(current_device)); + // set the context back to the current device + CUDA_CHECK(cudaSetDevice(current_device)); + } } template SphericalHarmonics::~SphericalHarmonics() { // Destructor, frees the prefactors - delete[] (this->prefactors_cpu); + if (this->prefactors_cpu != nullptr) { + delete[] (this->prefactors_cpu); + this->prefactors_cpu = nullptr; + } - int current_device; + if (this->device_count > 0) { - CUDA_CHECK(cudaGetDevice(¤t_device)); + int current_device; - for (int device = 0; device < this->device_count; device++) { - CUDA_CHECK(cudaSetDevice(device)); - CUDA_CHECK(cudaDeviceSynchronize()); - CUDA_CHECK(cudaFree(this->prefactors_cuda[device])); - } + CUDA_CHECK(cudaGetDevice(¤t_device)); - CUDA_CHECK(cudaSetDevice(current_device)); + for (int device = 0; device < this->device_count; device++) { + CUDA_CHECK(cudaSetDevice(device)); + CUDA_CHECK(cudaDeviceSynchronize()); + if (this->prefactors_cuda != nullptr && this->prefactors_cuda[device] != nullptr) { + CUDA_CHECK(cudaFree(this->prefactors_cuda[device])); + this->prefactors_cuda[device] = nullptr; + } + } + this->prefactors_cuda = nullptr; + + CUDA_CHECK(cudaSetDevice(current_device)); + } } template From 9e3c566abec654cdb1d9db936be361278967f488 Mon Sep 17 00:00:00 2001 From: "Nick J. Browning" Date: Fri, 3 May 2024 12:58:09 +0200 Subject: [PATCH 5/8] changed to unique_ptr --- sphericart-torch/include/sphericart/torch.hpp | 5 ++--- sphericart-torch/src/torch.cpp | 15 +++------------ 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/sphericart-torch/include/sphericart/torch.hpp b/sphericart-torch/include/sphericart/torch.hpp index 88f3bdb1..21df16ae 100644 --- a/sphericart-torch/include/sphericart/torch.hpp +++ b/sphericart-torch/include/sphericart/torch.hpp @@ -18,7 +18,6 @@ class SphericalHarmonics : public torch::CustomClassHolder { SphericalHarmonics( int64_t l_max, bool normalized = false, bool backward_second_derivatives = false ); - ~SphericalHarmonics(); // Actual calculation, with autograd support torch::Tensor compute(torch::Tensor xyz); @@ -46,8 +45,8 @@ class SphericalHarmonics : public torch::CustomClassHolder { sphericart::SphericalHarmonics calculator_float_; // CUDA implementation - sphericart::cuda::SphericalHarmonics* calculator_cuda_double_ptr = nullptr; - sphericart::cuda::SphericalHarmonics* calculator_cuda_float_ptr = nullptr; + std::unique_ptr> calculator_cuda_double_ptr; + std::unique_ptr> calculator_cuda_float_ptr; }; } // namespace sphericart_torch diff --git a/sphericart-torch/src/torch.cpp b/sphericart-torch/src/torch.cpp index bcafd44e..bc7138ca 100644 --- a/sphericart-torch/src/torch.cpp +++ b/sphericart-torch/src/torch.cpp @@ -18,19 +18,10 @@ SphericalHarmonics::SphericalHarmonics(int64_t l_max, bool normalized, bool back if (torch::cuda::is_available()) { this->calculator_cuda_double_ptr = - new sphericart::cuda::SphericalHarmonics(l_max_, normalized_); - this->calculator_cuda_float_ptr = - new sphericart::cuda::SphericalHarmonics(l_max_, normalized_); - } -} + std::make_unique>(l_max_, normalized_); -SphericalHarmonics::~SphericalHarmonics() { - if (this->calculator_cuda_double_ptr != nullptr) { - delete this->calculator_cuda_double_ptr; - } - - if (this->calculator_cuda_float_ptr != nullptr) { - delete this->calculator_cuda_float_ptr; + this->calculator_cuda_float_ptr = + std::make_unique>(l_max_, normalized_); } } From 5336ded1e5f311531f2e04bfad3acfe671e9774c Mon Sep 17 00:00:00 2001 From: "Nick J. Browning" Date: Fri, 12 Jul 2024 08:38:39 +0200 Subject: [PATCH 6/8] small commit to retrgger CI/CD --- sphericart/src/cuda_base.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/sphericart/src/cuda_base.cu b/sphericart/src/cuda_base.cu index 1c56186c..1e70c19d 100644 --- a/sphericart/src/cuda_base.cu +++ b/sphericart/src/cuda_base.cu @@ -900,7 +900,6 @@ void sphericart::cuda::spherical_harmonics_backward_cuda_base( scalar_t* __restrict__ xyz_grad, void* cuda_stream ) { - dim3 grid_dim(4, 32); auto find_num_blocks = [](int x, int bdim) { return (x + bdim - 1) / bdim; }; From 55b588fc0f09cef40e9b6445b10e537e681a984c Mon Sep 17 00:00:00 2001 From: "Nick J. Browning" Date: Fri, 12 Jul 2024 08:43:07 +0200 Subject: [PATCH 7/8] retrigger --- sphericart-torch/src/torch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sphericart-torch/src/torch.cpp b/sphericart-torch/src/torch.cpp index 242e9306..e4090727 100644 --- a/sphericart-torch/src/torch.cpp +++ b/sphericart-torch/src/torch.cpp @@ -1,5 +1,6 @@ #include +#include #include "sphericart/torch.hpp" #include "sphericart/autograd.hpp" From cdc69a9d9812bbbaf96a6d50293784e2f417588f Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Tue, 16 Jul 2024 21:37:40 +0200 Subject: [PATCH 8/8] Include standard torch header --- sphericart-torch/include/sphericart/torch.hpp | 2 +- sphericart-torch/src/torch.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sphericart-torch/include/sphericart/torch.hpp b/sphericart-torch/include/sphericart/torch.hpp index 21df16ae..e4fb51c0 100644 --- a/sphericart-torch/include/sphericart/torch.hpp +++ b/sphericart-torch/include/sphericart/torch.hpp @@ -1,7 +1,7 @@ #ifndef SPHERICART_TORCH_HPP #define SPHERICART_TORCH_HPP -#include +#include #include diff --git a/sphericart-torch/src/torch.cpp b/sphericart-torch/src/torch.cpp index e4090727..ba832544 100644 --- a/sphericart-torch/src/torch.cpp +++ b/sphericart-torch/src/torch.cpp @@ -1,5 +1,3 @@ - -#include #include #include "sphericart/torch.hpp"