diff --git a/source/source_hsolver/CMakeLists.txt b/source/source_hsolver/CMakeLists.txt index c2aa8c60e4..568510b603 100644 --- a/source/source_hsolver/CMakeLists.txt +++ b/source/source_hsolver/CMakeLists.txt @@ -21,6 +21,7 @@ if(ENABLE_LCAO) hsolver_lcao.cpp diago_scalapack.cpp parallel_k2d.cpp + diago_lapack.cpp ) else () list(APPEND objects diff --git a/source/source_hsolver/diago_lapack.cpp b/source/source_hsolver/diago_lapack.cpp index 90018c288d..a0271da244 100644 --- a/source/source_hsolver/diago_lapack.cpp +++ b/source/source_hsolver/diago_lapack.cpp @@ -30,12 +30,10 @@ void DiagoLapack::diag(hamilt::Hamilt* phm_in, psi::Psi& // Diag this->dsygvx_diag(h_mat.col, h_mat.row, h_mat.p, s_mat.p, eigen.data(), psi); // Copy result - int size = eigen.size(); - for (int i = 0; i < size; i++) - { - eigenvalue_in[i] = eigen[i]; - } + const int inc = 1; + BlasConnector::copy(PARAM.inp.nbands, eigen.data(), inc, eigenvalue_in, inc); } + template <> void DiagoLapack>::diag(hamilt::Hamilt>* phm_in, psi::Psi>& psi, @@ -48,109 +46,152 @@ void DiagoLapack>::diag(hamilt::Hamilt std::vector eigen(PARAM.globalv.nlocal, 0.0); this->zhegvx_diag(h_mat.col, h_mat.row, h_mat.p, s_mat.p, eigen.data(), psi); - int size = eigen.size(); - for (int i = 0; i < size; i++) - { - eigenvalue_in[i] = eigen[i]; - } + const int inc = 1; + BlasConnector::copy(PARAM.inp.nbands, eigen.data(), inc, eigenvalue_in, inc); +} + +#ifdef __MPI + template<> + void DiagoLapack::diag_pool(hamilt::MatrixBlock& h_mat, + hamilt::MatrixBlock& s_mat, + psi::Psi& psi, + Real* eigenvalue_in, + MPI_Comm& comm) +{ + ModuleBase::TITLE("DiagoScalapack", "diag_pool"); + assert(h_mat.col == s_mat.col && h_mat.row == s_mat.row && h_mat.desc == s_mat.desc); + std::vector eigen(PARAM.globalv.nlocal, 0.0); + this->dsygvx_diag(h_mat.col, h_mat.row, h_mat.p, s_mat.p, eigen.data(), psi); + const int inc = 1; + BlasConnector::copy(PARAM.inp.nbands, eigen.data(), inc, eigenvalue_in, inc); +} + template<> + void DiagoLapack>::diag_pool(hamilt::MatrixBlock>& h_mat, + hamilt::MatrixBlock>& s_mat, + psi::Psi>& psi, + Real* eigenvalue_in, + MPI_Comm& comm) +{ + ModuleBase::TITLE("DiagoScalapack", "diag_pool"); + assert(h_mat.col == s_mat.col && h_mat.row == s_mat.row && h_mat.desc == s_mat.desc); + std::vector eigen(PARAM.globalv.nlocal, 0.0); + this->zhegvx_diag(h_mat.col, h_mat.row, h_mat.p, s_mat.p, eigen.data(), psi); + const int inc = 1; + BlasConnector::copy(PARAM.inp.nbands, eigen.data(), inc, eigenvalue_in, inc); } +#endif template -int DiagoLapack::dsygvx_once(const int ncol, +std::pair> DiagoLapack::dsygvx_once(const int ncol, const int nrow, const double* const h_mat, const double* const s_mat, double* const ekb, psi::Psi& wfc_2d) const { - // Copy matrix to temp variables ModuleBase::matrix h_tmp(ncol, nrow, false); memcpy(h_tmp.c, h_mat, sizeof(double) * ncol * nrow); - - ModuleBase::matrix s_tmp(ncol, nrow, false); memcpy(s_tmp.c, s_mat, sizeof(double) * ncol * nrow); - // Prepare caculate parameters const char jobz = 'V', range = 'I', uplo = 'U'; const int itype = 1, il = 1, iu = PARAM.inp.nbands, one = 1; - int M = 0, info = 0; + int M = 0, NZ = 0, lwork = -1, liwork = -1, info = 0; double vl = 0, vu = 0; - const double abstol = 0; - - int lwork = (ncol + 2) * ncol; - + const double abstol = 0, orfac = -1; std::vector work(3, 0); std::vector iwork(1, 0); std::vector ifail(PARAM.globalv.nlocal, 0); - - // Original Lapack caculate, obelsete - /*dsygvx_(&itype, - &jobz, - &range, - &uplo, - &PARAM.globalv.nlocal, - h_tmp.c, - &ncol, - s_tmp.c, - &ncol, - &vl, - &vu, - &il, - &iu, - &abstol, - &M, - ekb, - wfc_2d.get_pointer(), - &ncol, - work.data(), - &lwork, - iwork.data(), - ifail.data(), - &info); - - // Throw error if it returns info - if (info) + std::vector iclustr(2 * GlobalV::DSIZE); + std::vector gap(GlobalV::DSIZE); + + // LAPACK dsygvx signature: + // (ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, VL, VU, IL, IU, + // ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK, IFAIL, INFO) + int n = PARAM.globalv.nlocal; + int lda = n, ldb = n, ldz = n; + dsygvx_(&itype, + &jobz, + &range, + &uplo, + &n, + h_tmp.c, + &lda, + s_tmp.c, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &M, + ekb, + wfc_2d.get_pointer(), + &ldz, + work.data(), + &lwork, + iwork.data(), + ifail.data(), + &info); + if (info) { throw std::runtime_error("info = " + ModuleBase::GlobalFunc::TO_STRING(info) + ".\n" + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - //lwork = work[0]; - //work.resize(std::max(lwork, 3), 0); - //iwork.resize(iwork[0], 0); - - dsygvx_(&itype, - &jobz, - &range, - &uplo, - &PARAM.globalv.nlocal, - h_tmp.c, - &PARAM.globalv.nlocal, - s_tmp.c, - &PARAM.globalv.nlocal, - &vl, - &vu, - &il, - &iu, - &abstol, - &M, - ekb, - wfc_2d.get_pointer(), - &ncol, - work.data(), - &lwork, - iwork.data(), - ifail.data(), - &info);*/ - - double *ev = new double[ncol * ncol]; + } - dsygv_(&itype, &jobz, &uplo, &PARAM.globalv.nlocal, h_tmp.c, &ncol, s_tmp.c, &ncol, ekb, ev, &lwork, &info); + // Query returned optimal lwork in work[0] + lwork = static_cast(work[0]); + work.resize(std::max(lwork, 3), 0); + // LAPACK integer workspace: use conservative size (5*N) + liwork = std::max(1, 5 * n); + iwork.resize(liwork, 0); - return info; + dsygvx_(&itype, + &jobz, + &range, + &uplo, + &n, + h_tmp.c, + &lda, + s_tmp.c, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &M, + ekb, + wfc_2d.get_pointer(), + &ldz, + work.data(), + &lwork, + iwork.data(), + ifail.data(), + &info); + // GlobalV::ofs_running<<"M="<{}); + } else if (info < 0) { + return std::make_pair(info, std::vector{}); + } else if (info % 2) { + return std::make_pair(info, ifail); + } else if (info / 2 % 2) { + return std::make_pair(info, iclustr); + } else if (info / 4 % 2) { + return std::make_pair(info, std::vector{M, NZ}); + } else if (info / 16 % 2) { + return std::make_pair(info, ifail); + } else { + throw std::runtime_error("info = " + ModuleBase::GlobalFunc::TO_STRING(info) + ".\n" + + std::string(__FILE__) + " line " + + std::to_string(__LINE__)); + } } template -int DiagoLapack::zhegvx_once(const int ncol, +std::pair> DiagoLapack::zhegvx_once(const int ncol, const int nrow, const std::complex* const h_mat, const std::complex* const s_mat, @@ -159,95 +200,112 @@ int DiagoLapack::zhegvx_once(const int ncol, { ModuleBase::ComplexMatrix h_tmp(ncol, nrow, false); memcpy(h_tmp.c, h_mat, sizeof(std::complex) * ncol * nrow); - ModuleBase::ComplexMatrix s_tmp(ncol, nrow, false); memcpy(s_tmp.c, s_mat, sizeof(std::complex) * ncol * nrow); const char jobz = 'V', range = 'I', uplo = 'U'; const int itype = 1, il = 1, iu = PARAM.inp.nbands, one = 1; - int M = 0, lrwork = -1, info = 0; - const double abstol = 0; - - int lwork = (ncol + 2) * ncol; - + int M = 0, NZ = 0, lwork = -1, lrwork = -1, liwork = -1, info = 0; + const double abstol = 0, orfac = -1; + //Note: pzhegvx_ has a bug + // We must give vl,vu a value, although we do not use range 'V' + // We must give rwork at least a memory of sizeof(double) * 3 const double vl = 0, vu = 0; std::vector> work(1, 0); - double *rwork = new double[3 * ncol - 2]; + std::vector rwork(3, 0); std::vector iwork(1, 0); std::vector ifail(PARAM.globalv.nlocal, 0); - - // Original Lapack caculate, obelsete - /* + std::vector iclustr(2 * GlobalV::DSIZE); + std::vector gap(GlobalV::DSIZE); + + // LAPACK zhegvx signature: + // (ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, VL, VU, IL, IU, + // ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, IWORK, IFAIL, INFO) + int n = PARAM.globalv.nlocal; + int lda = n, ldb = n, ldz = n; zhegvx_(&itype, - &jobz, - &range, - &uplo, - &PARAM.globalv.nlocal, - h_tmp.c, - &PARAM.globalv.nlocal, - s_tmp.c, - &PARAM.globalv.nlocal, - &vl, - &vu, - &il, - &iu, - &abstol, - &M, - ekb, - wfc_2d.get_pointer(), - &ncol, - work.data(), - &lwork, - rwork.data(), - iwork.data(), - ifail.data(), - &info); - - if (info) + &jobz, + &range, + &uplo, + &n, + h_tmp.c, + &lda, + s_tmp.c, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &M, + ekb, + wfc_2d.get_pointer(), + &ldz, + work.data(), + &lwork, + rwork.data(), + iwork.data(), + ifail.data(), + &info); + if (info) { throw std::runtime_error("info=" + ModuleBase::GlobalFunc::TO_STRING(info) + ". " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } - // GlobalV::ofs_running<<"lwork="<(work[0].real()); + work.resize(std::max(lwork, 1), 0); + // rwork: choose conservative size. Use baseline 7*N plus degeneracy margin + lrwork = std::max(3, 7 * n + this->degeneracy_max * n); + rwork.resize(lrwork, 0); + // LAPACK integer workspace: use conservative size (5*N) + liwork = std::max(1, 5 * n); + iwork.resize(liwork, 0); zhegvx_(&itype, - &jobz, - &range, - &uplo, - &PARAM.globalv.nlocal, - h_tmp.c, - &PARAM.globalv.nlocal, - s_tmp.c, - &PARAM.globalv.nlocal, - &vl, - &vu, - &il, - &iu, - &abstol, - &M, - ekb, - wfc_2d.get_pointer(), - &ncol, - work.data(), - &lwork, - rwork.data(), - iwork.data(), - ifail.data(), - &info); - - */ - - std::complex *ev = new std::complex[ncol * ncol]; - - zhegv_(&itype, &jobz, &uplo, &PARAM.globalv.nlocal, h_tmp.c, &ncol, s_tmp.c, &ncol, ekb, ev, &lwork, rwork, &info); - - return info; + &jobz, + &range, + &uplo, + &n, + h_tmp.c, + &lda, + s_tmp.c, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &M, + ekb, + wfc_2d.get_pointer(), + &ldz, + work.data(), + &lwork, + rwork.data(), + iwork.data(), + ifail.data(), + &info); + // GlobalV::ofs_running<<"M="<{}); + } else if (info < 0) { + return std::make_pair(info, std::vector{}); + } else if (info % 2) { + return std::make_pair(info, ifail); + } else if (info / 2 % 2) { + return std::make_pair(info, iclustr); + } else if (info / 4 % 2) { + return std::make_pair(info, std::vector{M, NZ}); + } else if (info / 16 % 2) { + return std::make_pair(info, ifail); + } else { + throw std::runtime_error("info = " + ModuleBase::GlobalFunc::TO_STRING(info) + ".\n" + + std::string(__FILE__) + " line " + + std::to_string(__LINE__)); + } } template @@ -260,9 +318,9 @@ void DiagoLapack::dsygvx_diag(const int ncol, { while (true) { - - int info_result = dsygvx_once(ncol, nrow, h_mat, s_mat, ekb, wfc_2d); - if (info_result == 0) { + const std::pair> info_vec = dsygvx_once(ncol, nrow, h_mat, s_mat, ekb, wfc_2d); + post_processing(info_vec.first, info_vec.second); + if (info_vec.first == 0) { break; } } @@ -278,8 +336,9 @@ void DiagoLapack::zhegvx_diag(const int ncol, { while (true) { - int info_result = zhegvx_once(ncol, nrow, h_mat, s_mat, ekb, wfc_2d); - if (info_result == 0) { + const std::pair> info_vec = zhegvx_once(ncol, nrow, h_mat, s_mat, ekb, wfc_2d); + post_processing(info_vec.first, info_vec.second); + if (info_vec.first == 0) { break; } } @@ -297,5 +356,60 @@ void DiagoLapack::post_processing(const int info, const std::vector& vec { return; } + else if (info < 0) + { + const int info_negative = -info; + const std::string str_index + = (info_negative > 100) + ? ModuleBase::GlobalFunc::TO_STRING(info_negative / 100) + "-th argument " + + ModuleBase::GlobalFunc::TO_STRING(info_negative % 100) + "-entry is illegal.\n" + : ModuleBase::GlobalFunc::TO_STRING(info_negative) + "-th argument is illegal.\n"; + throw std::runtime_error(str_info_FILE + str_index); + } + else if (info % 2) + { + std::string str_ifail = "ifail = "; + for (const int i: vec) { + str_ifail += ModuleBase::GlobalFunc::TO_STRING(i) + " "; + } + throw std::runtime_error(str_info_FILE + str_ifail); + } + else if (info / 2 % 2) + { + int degeneracy_need = 0; + for (int irank = 0; irank < GlobalV::DSIZE; ++irank) { + degeneracy_need = std::max(degeneracy_need, vec[2 * irank + 1] - vec[2 * irank]); + } + const std::string str_need = "degeneracy_need = " + ModuleBase::GlobalFunc::TO_STRING(degeneracy_need) + ".\n"; + const std::string str_saved + = "degeneracy_saved = " + ModuleBase::GlobalFunc::TO_STRING(this->degeneracy_max) + ".\n"; + if (degeneracy_need <= this->degeneracy_max) + { + throw std::runtime_error(str_info_FILE + str_need + str_saved); + } + else + { + GlobalV::ofs_running << str_need << str_saved; + this->degeneracy_max = degeneracy_need; + return; + } + } + else if (info / 4 % 2) + { + const std::string str_M = "M = " + ModuleBase::GlobalFunc::TO_STRING(vec[0]) + ".\n"; + const std::string str_NZ = "NZ = " + ModuleBase::GlobalFunc::TO_STRING(vec[1]) + ".\n"; + const std::string str_NBANDS + = "PARAM.inp.nbands = " + ModuleBase::GlobalFunc::TO_STRING(PARAM.inp.nbands) + ".\n"; + throw std::runtime_error(str_info_FILE + str_M + str_NZ + str_NBANDS); + } + else if (info / 16 % 2) + { + const std::string str_npos = "not positive definite = " + ModuleBase::GlobalFunc::TO_STRING(vec[0]) + ".\n"; + throw std::runtime_error(str_info_FILE + str_npos); + } + else + { + throw std::runtime_error(str_info_FILE); + } } } // namespace hsolver \ No newline at end of file diff --git a/source/source_hsolver/diago_lapack.h b/source/source_hsolver/diago_lapack.h index 53b710ae63..bfdf78ac34 100644 --- a/source/source_hsolver/diago_lapack.h +++ b/source/source_hsolver/diago_lapack.h @@ -27,6 +27,10 @@ class DiagoLapack public: void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in); + #ifdef __MPI + // diagnolization used in parallel-k case + void diag_pool(hamilt::MatrixBlock& h_mat, hamilt::MatrixBlock& s_mat, psi::Psi& psi, Real* eigenvalue_in, MPI_Comm& comm); +#endif void dsygvx_diag(const int ncol, const int nrow, @@ -41,18 +45,18 @@ class DiagoLapack double* const ekb, psi::Psi>& wfc_2d); - int dsygvx_once(const int ncol, - const int nrow, - const double* const h_mat, - const double* const s_mat, - double* const ekb, - psi::Psi& wfc_2d) const; - int zhegvx_once(const int ncol, - const int nrow, - const std::complex* const h_mat, - const std::complex* const s_mat, - double* const ekb, - psi::Psi>& wfc_2d) const; + std::pair> dsygvx_once(const int ncol, + const int nrow, + const double* const h_mat, + const double* const s_mat, + double* const ekb, + psi::Psi& wfc_2d) const; + std::pair> zhegvx_once(const int ncol, + const int nrow, + const std::complex* const h_mat, + const std::complex* const s_mat, + double* const ekb, + psi::Psi>& wfc_2d) const; int degeneracy_max = 12; // For reorthogonalized memory. 12 followes siesta. diff --git a/source/source_hsolver/hsolver_lcao.cpp b/source/source_hsolver/hsolver_lcao.cpp index 1c4c5a5d61..413c86777a 100644 --- a/source/source_hsolver/hsolver_lcao.cpp +++ b/source/source_hsolver/hsolver_lcao.cpp @@ -3,10 +3,10 @@ #ifdef __MPI #include "diago_scalapack.h" #include "source_base/module_external/scalapack_connector.h" -#else -#include "diago_lapack.h" #endif +#include "diago_lapack.h" + #ifdef __CUSOLVERMP #include "diago_cusolvermp.h" #endif @@ -174,13 +174,11 @@ void HSolverLCAO::hamiltSolvePsiK(hamilt::Hamilt* hm, psi::Psi& } #endif #endif -#ifndef __MPI else if (this->method == "lapack") // only for single core { DiagoLapack la; la.diag(hm, psi, eigenvalue); } -#endif else { ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method is not supported for lcao basis in ABACUS!"); diff --git a/source/source_hsolver/test/CMakeLists.txt b/source/source_hsolver/test/CMakeLists.txt index 72ad05e0ba..217f8251b3 100644 --- a/source/source_hsolver/test/CMakeLists.txt +++ b/source/source_hsolver/test/CMakeLists.txt @@ -92,13 +92,13 @@ if (ENABLE_MPI) AddTest( TARGET MODULE_HSOLVER_LCAO LIBS parameter ${math_libs} ELPA::ELPA base genelpa psi device - SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_scalapack.cpp + SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_scalapack.cpp ../diago_lapack.cpp ) else() AddTest( TARGET MODULE_HSOLVER_LCAO LIBS parameter ${math_libs} base psi device - SOURCES diago_lcao_test.cpp ../diago_scalapack.cpp + SOURCES diago_lcao_test.cpp ../diago_scalapack.cpp ../diago_lapack.cpp ) endif() @@ -119,14 +119,6 @@ if (ENABLE_MPI) ../kernels/cuda/diag_cusolver.cu ) endif() -else() - if(ENABLE_LCAO) - AddTest( - TARGET MODULE_HSOLVER_Lapack - LIBS parameter ${math_libs} base psi device - SOURCES diago_lapack_test.cpp ../diago_lapack.cpp - ) - endif() endif() install(FILES H-KPoints-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES H-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/source/source_hsolver/test/diago_lcao_test.cpp b/source/source_hsolver/test/diago_lcao_test.cpp index bbfaad4a1e..8952a9d7c6 100644 --- a/source/source_hsolver/test/diago_lcao_test.cpp +++ b/source/source_hsolver/test/diago_lcao_test.cpp @@ -1,4 +1,5 @@ #include "source_hsolver/diago_scalapack.h" +#include "source_hsolver/diago_lapack.h" #include "source_hsolver/test/diago_elpa_utils.h" #define private public #include "source_io/module_parameter/parameter.h" @@ -74,6 +75,8 @@ class DiagoPrepare if (ks_solver == "scalapack_gvx") ; // dh = new hsolver::DiagoScalapack; + else if (ks_solver == "lapack") + ; #ifdef __ELPA else if (ks_solver == "genelpa") ; @@ -226,6 +229,11 @@ class DiagoPrepare hsolver::DiagoScalapack dh; dh.diag(&hmtest, psi, e_solver.data()); } + else if (ks_solver == "lapack") + { + hsolver::DiagoLapack la; + la.diag(&hmtest, psi, e_solver.data()); + } #ifdef __ELPA else if (ks_solver == "genelpa") { @@ -316,7 +324,9 @@ INSTANTIATE_TEST_SUITE_P( DiagoPrepare(0, 0, 32, 0, "genelpa", "H-GammaOnly-Si64.dat", "S-GammaOnly-Si64.dat"), #endif DiagoPrepare(0, 0, 1, 0, "scalapack_gvx", "H-GammaOnly-Si2.dat", "S-GammaOnly-Si2.dat"), - DiagoPrepare(0, 0, 32, 0, "scalapack_gvx", "H-GammaOnly-Si64.dat", "S-GammaOnly-Si64.dat"))); + DiagoPrepare(0, 0, 32, 0, "scalapack_gvx", "H-GammaOnly-Si64.dat", "S-GammaOnly-Si64.dat"), + DiagoPrepare(0, 0, 1, 0, "lapack", "H-GammaOnly-Si2.dat", "S-GammaOnly-Si2.dat"), + DiagoPrepare(0, 0, 32, 0, "lapack", "H-GammaOnly-Si64.dat", "S-GammaOnly-Si64.dat"))); class DiagoKPointsTest : public ::testing::TestWithParam>> {