From 03fda28a43930b5c5df05b90a934cdbc8b57d427 Mon Sep 17 00:00:00 2001 From: Ajay Panyala Date: Sun, 27 Oct 2024 15:16:15 -0700 Subject: [PATCH] Add output_dir option. Choleksy on sub-group. Fix clang warnings. Minor improvements to CC tilesize logic. --- docs/schema/input_schema.json | 5 +- docs/user_guide/basic_options.rst | 2 + exachem/cc/ccsd/cd_ccsd_cs_ann.cpp | 7 +- exachem/cc/ccsd/cd_ccsd_os_ann.cpp | 19 +- exachem/cc/ccsd_t/ccsd_t.cpp | 4 +- .../cc/ccsd_t/ccsd_t_all_fused_doubles1.hpp | 24 +- .../cc/ccsd_t/ccsd_t_all_fused_doubles2.hpp | 24 +- .../cc/ccsd_t/ccsd_t_all_fused_singles.hpp | 22 +- exachem/cc/ccsd_t/ccsd_t_fused_driver.hpp | 4 +- exachem/cc/gfcc/gfccsd_driver.cpp | 8 +- exachem/cc/scripts/ccsd_advisor.py | 4 +- exachem/cholesky/cholesky_2e.cpp | 705 ++++++++++-------- exachem/cholesky/cholesky_2e.hpp | 2 +- exachem/common/atom_info.hpp | 2 +- exachem/common/chemenv.cpp | 3 +- exachem/common/cutils.cpp | 5 +- exachem/common/cutils.hpp | 5 +- exachem/common/ecatom.cpp | 2 +- exachem/common/options/input_options.cpp | 1 + exachem/common/options/input_options.hpp | 1 + exachem/common/options/parse_ccsd_options.cpp | 1 + exachem/common/options/parse_ccsd_options.hpp | 2 +- exachem/common/options/parse_cd_options.hpp | 2 +- .../common/options/parse_common_options.cpp | 1 + exachem/common/options/parse_fci_options.cpp | 1 + exachem/common/options/parse_fci_options.hpp | 2 +- exachem/common/options/parse_gw_options.cpp | 2 + exachem/common/options/parse_gw_options.hpp | 2 +- exachem/common/options/parse_options.cpp | 5 +- exachem/common/options/parse_options.hpp | 2 +- exachem/common/options/parse_scf_options.cpp | 6 + exachem/common/options/parse_task_options.cpp | 1 + exachem/common/options/parse_task_options.hpp | 2 +- exachem/common/txt_utils.cpp | 2 +- exachem/scf/scf_compute.cpp | 10 +- exachem/scf/scf_hartree_fock.cpp | 6 +- exachem/scf/scf_hartree_fock.hpp | 1 - exachem/task/ec_task.cpp | 4 +- exachem/task/ec_task.hpp | 4 +- inputs/example.json | 1 + inputs/guanine_cytosine_3bp.json | 1 - inputs/h2.json | 2 +- inputs/ozone.json | 10 +- methods/ExaChem.cpp | 13 +- 44 files changed, 521 insertions(+), 411 deletions(-) diff --git a/docs/schema/input_schema.json b/docs/schema/input_schema.json index 70101a9..e3982e1 100644 --- a/docs/schema/input_schema.json +++ b/docs/schema/input_schema.json @@ -47,9 +47,12 @@ "debug": { "type": "boolean" }, - "output_file_prefix": { + "file_prefix": { "type": "string" }, + "output_dir": { + "type": "string" + }, "comments": { "type": "object", "properties": { diff --git a/docs/user_guide/basic_options.rst b/docs/user_guide/basic_options.rst index 3e6dbc7..12a3c95 100644 --- a/docs/user_guide/basic_options.rst +++ b/docs/user_guide/basic_options.rst @@ -52,6 +52,8 @@ Some common options supported are as follows: :file_prefix: A string indicating the prefix for the name of the workspace folder where the results of a run are stored. It also forms the prefix for the files written to the workspace folder. The *default prefix* is the name of the input file without the *.json* extension. +:output_dir: A string indicating the path to the directory where the where the results of a run are stored. If not provided, the default is the current working directory. + .. _Basis: Basis set options diff --git a/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp b/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp index 95c2a76..7e8fcaf 100644 --- a/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp +++ b/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp @@ -771,7 +771,7 @@ std::tuple exachem::cc::ccsd::cd_ccsd_cs_driver( iteration_print(chem_env, ec.pg(), iter, residual, energy, iter_time); - if(writet && (((iter + 1) % writet_iter == 0) || (residual < thresh))) { + if(writet && ((iter + 1) % writet_iter == 0)) { write_to_disk(t1_aa, t1file); write_to_disk(t2_abab, t2file); } @@ -792,6 +792,11 @@ std::tuple exachem::cc::ccsd::cd_ccsd_cs_driver( diis(ec, rs, ts, next_t); } + if(writet) { + write_to_disk(t1_aa, t1file); + write_to_disk(t2_abab, t2file); + } + if(profile && ec.print()) { std::string profile_csv = ccsd_fp + "_profile.csv"; std::ofstream pds(profile_csv, std::ios::out); diff --git a/exachem/cc/ccsd/cd_ccsd_os_ann.cpp b/exachem/cc/ccsd/cd_ccsd_os_ann.cpp index 270ee38..d39dd4a 100644 --- a/exachem/cc/ccsd/cd_ccsd_os_ann.cpp +++ b/exachem/cc/ccsd/cd_ccsd_os_ann.cpp @@ -1086,7 +1086,7 @@ cd_ccsd_os_driver(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpace iteration_print(chem_env, ec.pg(), iter, residual, energy, iter_time); - if(writet && (((iter + 1) % writet_iter == 0) /*|| (residual < thresh)*/)) { + if(writet && ((iter + 1) % writet_iter == 0)) { write_to_disk(d_t1, t1file); write_to_disk(d_t2, t2file); } @@ -1102,14 +1102,6 @@ cd_ccsd_os_driver(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpace .deallocate(t2_copy) .execute(); // clang-format on - if(writet) { - write_to_disk(d_t1, t1file); - write_to_disk(d_t2, t2file); - if(computeTData && chem_env.ioptions.ccsd_options.writev) { - fs::copy_file(t1file, ccsd_fp + ".fullT1amp", fs::copy_options::update_existing); - fs::copy_file(t2file, ccsd_fp + ".fullT2amp", fs::copy_options::update_existing); - } - } break; } } @@ -1129,6 +1121,15 @@ cd_ccsd_os_driver(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpace diis(ec, rs, ts, next_t); } + if(writet) { + write_to_disk(d_t1, t1file); + write_to_disk(d_t2, t2file); + if(computeTData && chem_env.ioptions.ccsd_options.writev) { + fs::copy_file(t1file, ccsd_fp + ".fullT1amp", fs::copy_options::update_existing); + fs::copy_file(t2file, ccsd_fp + ".fullT2amp", fs::copy_options::update_existing); + } + } + if(profile && ec.print()) { std::string profile_csv = ccsd_fp + "_profile.csv"; std::ofstream pds(profile_csv, std::ios::out); diff --git a/exachem/cc/ccsd_t/ccsd_t.cpp b/exachem/cc/ccsd_t/ccsd_t.cpp index bd3664c..8a8d86a 100644 --- a/exachem/cc/ccsd_t/ccsd_t.cpp +++ b/exachem/cc/ccsd_t/ccsd_t.cpp @@ -59,7 +59,7 @@ void exachem::cc::ccsd_t::ccsd_t_driver(ExecutionContext& ec, ChemEnv& chem_env) int ga_cnn = ec.nnodes(); if(nsranks > ga_cnn) nsranks = ga_cnn; nsranks = nsranks * GA_Cluster_nprocs(0); - int subranks[nsranks]; + std::vector subranks(nsranks); for(int i = 0; i < nsranks; i++) subranks[i] = i; #if defined(USE_UPCXX) @@ -69,7 +69,7 @@ void exachem::cc::ccsd_t::ccsd_t_driver(ExecutionContext& ec, ChemEnv& chem_env) MPI_Group world_group; MPI_Comm_group(world_comm, &world_group); MPI_Group subgroup; - MPI_Group_incl(world_group, nsranks, subranks, &subgroup); + MPI_Group_incl(world_group, nsranks, subranks.data(), &subgroup); MPI_Comm subcomm; MPI_Comm_create(world_comm, subgroup, &subcomm); MPI_Group_free(&world_group); diff --git a/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles1.hpp b/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles1.hpp index c07cde0..8795a0d 100644 --- a/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles1.hpp +++ b/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles1.hpp @@ -354,8 +354,8 @@ void ccsd_t_data_d1_info_only(bool is_restricted, const Index noab, const Index } // end ia6 // ia6 -- compute sizes and permutations - int idx_offset = 0; - int detailed_stats[noab][9]; + int idx_offset = 0; + std::vector> detailed_stats(noab); for(Index idx_noab = 0; idx_noab < noab; idx_noab++) { detailed_stats[idx_noab][0] = 0; @@ -394,7 +394,7 @@ void ccsd_t_data_d1_info_only(bool is_restricted, const Index noab, const Index detailed_stats[h7b][7] = 0; detailed_stats[h7b][8] = 0; - int idx_new_offset = 0; + // int idx_new_offset = 0; if(!ia6_enabled[ia6 * noab + h7b]) continue; size_t dim_common = k_range[h7b]; @@ -405,55 +405,55 @@ void ccsd_t_data_d1_info_only(bool is_restricted, const Index noab, const Index if(ref_p456_h123 == cur_p456_h123) { df_simple_d1_exec[0 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][0] = detailed_stats[h7b][0] + 1; } if(ref_p456_h123 == cur_p456_h213) { df_simple_d1_exec[1 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][1] = detailed_stats[h7b][1] + 1; } if(ref_p456_h123 == cur_p456_h231) { df_simple_d1_exec[2 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][2] = detailed_stats[h7b][2] + 1; } if(ref_p456_h123 == cur_p645_h123) { df_simple_d1_exec[3 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][3] = detailed_stats[h7b][3] + 1; } if(ref_p456_h123 == cur_p645_h213) { df_simple_d1_exec[4 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][4] = detailed_stats[h7b][4] + 1; } if(ref_p456_h123 == cur_p645_h231) { df_simple_d1_exec[5 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][5] = detailed_stats[h7b][5] + 1; } if(ref_p456_h123 == cur_p465_h123) { df_simple_d1_exec[6 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][6] = detailed_stats[h7b][6] + 1; } if(ref_p456_h123 == cur_p465_h213) { df_simple_d1_exec[7 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][7] = detailed_stats[h7b][7] + 1; } if(ref_p456_h123 == cur_p465_h231) { df_simple_d1_exec[8 + h7b * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[h7b][8] = detailed_stats[h7b][8] + 1; } diff --git a/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles2.hpp b/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles2.hpp index 7b68cd4..3a7cb15 100644 --- a/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles2.hpp +++ b/exachem/cc/ccsd_t/ccsd_t_all_fused_doubles2.hpp @@ -407,8 +407,8 @@ void ccsd_t_data_d2_info_only(bool is_restricted, const Index noab, const Index } // end ia6 // ia6 -- compute sizes and permutations - int idx_offset = 0; - int detailed_stats[nvab][9]; + int idx_offset = 0; + std::vector> detailed_stats(nvab); for(Index idx_nvab = 0; idx_nvab < nvab; idx_nvab++) { detailed_stats[idx_nvab][0] = 0; @@ -447,7 +447,7 @@ void ccsd_t_data_d2_info_only(bool is_restricted, const Index noab, const Index detailed_stats[p7b - noab][7] = 0; detailed_stats[p7b - noab][8] = 0; - int idx_new_offset = 0; + // int idx_new_offset = 0; if(!ia6_enabled[ia6 * nvab + p7b - noab]) continue; size_t dim_common = k_range[p7b]; @@ -458,55 +458,55 @@ void ccsd_t_data_d2_info_only(bool is_restricted, const Index noab, const Index if(ref_p456_h123 == cur_p456_h123) { df_simple_d2_exec[0 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][0] = detailed_stats[p7b - noab][0] + 1; } if(ref_p456_h123 == cur_p456_h312) { df_simple_d2_exec[1 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][1] = detailed_stats[p7b - noab][1] + 1; } if(ref_p456_h123 == cur_p456_h132) { df_simple_d2_exec[2 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][2] = detailed_stats[p7b - noab][2] + 1; } if(ref_p456_h123 == cur_p546_h123) { df_simple_d2_exec[3 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][3] = detailed_stats[p7b - noab][3] + 1; } if(ref_p456_h123 == cur_p546_h312) { df_simple_d2_exec[4 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][4] = detailed_stats[p7b - noab][4] + 1; } if(ref_p456_h123 == cur_p546_h132) { df_simple_d2_exec[5 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][5] = detailed_stats[p7b - noab][5] + 1; } if(ref_p456_h123 == cur_p564_h123) { df_simple_d2_exec[6 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][6] = detailed_stats[p7b - noab][6] + 1; } if(ref_p456_h123 == cur_p564_h312) { df_simple_d2_exec[7 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][7] = detailed_stats[p7b - noab][7] + 1; } if(ref_p456_h123 == cur_p564_h132) { df_simple_d2_exec[8 + (p7b - noab) * 9] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[p7b - noab][8] = detailed_stats[p7b - noab][8] + 1; } diff --git a/exachem/cc/ccsd_t/ccsd_t_all_fused_singles.hpp b/exachem/cc/ccsd_t/ccsd_t_all_fused_singles.hpp index df73d10..cfa06fa 100644 --- a/exachem/cc/ccsd_t/ccsd_t_all_fused_singles.hpp +++ b/exachem/cc/ccsd_t/ccsd_t_all_fused_singles.hpp @@ -447,8 +447,8 @@ void ccsd_t_data_s1_info_only(bool is_restricted, const Index noab, const Index detailed_stats[8] = 0; // ia6 -- compute sizes and permutations - int idx_offset = 0; - int idx_new_offset = 0; + int idx_offset = 0; + // int idx_new_offset = 0; for(auto ia6 = 0; ia6 < 9; ia6++) { if(!ia6_enabled[ia6]) { continue; } @@ -474,55 +474,55 @@ void ccsd_t_data_s1_info_only(bool is_restricted, const Index noab, const Index if(ref_p456_h123 == cur_p456_h123) { df_simple_s1_exec[0] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[0] = detailed_stats[0] + 1; } if(ref_p456_h123 == cur_p456_h213) { df_simple_s1_exec[1] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[1] = detailed_stats[1] + 1; } if(ref_p456_h123 == cur_p456_h231) { df_simple_s1_exec[2] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[2] = detailed_stats[2] + 1; } if(ref_p456_h123 == cur_p546_h123) { df_simple_s1_exec[3] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[3] = detailed_stats[3] + 1; } if(ref_p456_h123 == cur_p546_h213) { df_simple_s1_exec[4] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[4] = detailed_stats[4] + 1; } if(ref_p456_h123 == cur_p546_h231) { df_simple_s1_exec[5] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[5] = detailed_stats[5] + 1; } if(ref_p456_h123 == cur_p564_h123) { df_simple_s1_exec[6] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[6] = detailed_stats[6] + 1; } if(ref_p456_h123 == cur_p564_h213) { df_simple_s1_exec[7] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[7] = detailed_stats[7] + 1; } if(ref_p456_h123 == cur_p564_h231) { df_simple_s1_exec[8] = idx_offset; *num_enabled_kernels = *num_enabled_kernels + 1; - idx_new_offset++; + // idx_new_offset++; detailed_stats[8] = detailed_stats[8] + 1; } // diff --git a/exachem/cc/ccsd_t/ccsd_t_fused_driver.hpp b/exachem/cc/ccsd_t/ccsd_t_fused_driver.hpp index d93d445..3ddf2fb 100644 --- a/exachem/cc/ccsd_t/ccsd_t_fused_driver.hpp +++ b/exachem/cc/ccsd_t/ccsd_t_fused_driver.hpp @@ -222,7 +222,7 @@ std::tuple ccsd_t_fused_driver_new( static_cast(memDevPool.allocate(sizeof(T) * std::pow(max_num_blocks, 6) * 2)); #endif - int num_task = 0; + // int num_task = 0; // if(!seq_h3b) { // if(rank == 0) { // std::cout << "456123 parallel 6d loop variant" << std::endl << std::endl; @@ -354,7 +354,7 @@ std::tuple ccsd_t_fused_driver_new( else if((t_h1b == t_h2b) || (t_h2b == t_h3b)) { factor /= 2.0; } // - num_task++; + // num_task++; #if defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP) ccsd_t_fully_fused_none_df_none_task( diff --git a/exachem/cc/gfcc/gfccsd_driver.cpp b/exachem/cc/gfcc/gfccsd_driver.cpp index 26c6f58..2a95e48 100644 --- a/exachem/cc/gfcc/gfccsd_driver.cpp +++ b/exachem/cc/gfcc/gfccsd_driver.cpp @@ -2534,16 +2534,16 @@ void gfccsd_driver(ExecutionContext& ec, ChemEnv& chem_env) { int nsranks = sys_data.nbf / 15; if(nsranks < 1) nsranks = 1; - int ga_cnn = GA_Cluster_nnodes(); + int ga_cnn = ec.nnodes(); if(nsranks > ga_cnn) nsranks = ga_cnn; - nsranks = nsranks * GA_Cluster_nprocs(0); - int subranks[nsranks]; + nsranks = nsranks * ec.ppn(); + std::vector subranks(nsranks); for(int i = 0; i < nsranks; i++) subranks[i] = i; auto world_comm = ec.pg().comm(); MPI_Group world_group; MPI_Comm_group(world_comm, &world_group); MPI_Group subgroup; - MPI_Group_incl(world_group, nsranks, subranks, &subgroup); + MPI_Group_incl(world_group, nsranks, subranks.data(), &subgroup); MPI_Comm subcomm; MPI_Comm_create(world_comm, subgroup, &subcomm); MPI_Group_free(&subgroup); diff --git a/exachem/cc/scripts/ccsd_advisor.py b/exachem/cc/scripts/ccsd_advisor.py index 9162835..aae4ee1 100644 --- a/exachem/cc/scripts/ccsd_advisor.py +++ b/exachem/cc/scripts/ccsd_advisor.py @@ -227,9 +227,9 @@ def parseargs(argv=None): VabOab = v_alpha*o_beta*v_beta*o_alpha -ts_guess=50 +ts_guess=40 ts_max=ts_guess -tilesizes = list(range(ts_guess, 301, 10)) +tilesizes = list(range(ts_guess, 181, 10)) def get_ts_recommendation(tilesizes,nranks): ts_guess_ = tilesizes[0] diff --git a/exachem/cholesky/cholesky_2e.cpp b/exachem/cholesky/cholesky_2e.cpp index d61099a..6b1793c 100644 --- a/exachem/cholesky/cholesky_2e.cpp +++ b/exachem/cholesky/cholesky_2e.cpp @@ -11,6 +11,7 @@ using namespace exachem::scf; bool cd_debug = false; #define CD_USE_PGAS_API +#define CD_THROTTLE template auto cd_tensor_zero(Tensor& tens) { @@ -21,7 +22,8 @@ auto cd_tensor_zero(Tensor& tens) { namespace exachem::cholesky_2e { -int get_ts_recommendation(int nranks, ChemEnv& chem_env) { +int get_ts_recommendation(ExecutionContext& ec, ChemEnv& chem_env) { + int nranks = ec.nnodes() * ec.ppn(); int ts_guess = chem_env.ioptions.ccsd_options.tilesize; SystemData& sys_data = chem_env.sys_data; @@ -30,8 +32,20 @@ int get_ts_recommendation(int nranks, ChemEnv& chem_env) { const auto o_beta = sys_data.n_occ_beta; const auto v_beta = sys_data.n_vir_beta; + int max_ts = 180; // gpu_mem=64 + tamm::meminfo minfo = ec.mem_info(); + if(ec.has_gpu()) { + const auto gpu_mem = minfo.gpu_mem_per_device; + if(gpu_mem <= 8) max_ts = 100; + else if(gpu_mem <= 12) max_ts = 120; + else if(gpu_mem <= 16) max_ts = 130; + else if(gpu_mem <= 24) max_ts = 145; + else if(gpu_mem <= 32) max_ts = 155; + else if(gpu_mem <= 48) max_ts = 170; + } + std::vector tilesizes; - for(int i = ts_guess; i <= 300; i += 10) { tilesizes.push_back(i); } + for(int i = ts_guess; i <= max_ts; i += 5) { tilesizes.push_back(i); } int ts_guess_ = tilesizes[0]; int ts_max_ = tilesizes[0]; @@ -68,7 +82,7 @@ std::tuple setup_mo_red(ExecutionContext& ec, ChemEn if(!ccsd_options.force_tilesize && ec.has_gpu()) { tce_tile = static_cast(sys_data.nbf / 10); if(tce_tile < 50) tce_tile = 50; // 50 is the default tilesize - else if(tce_tile > 100) tce_tile = 100; // 100 is the max tilesize + else if(tce_tile > 140) tce_tile = 140; // 140 is the max tilesize if(rank == 0) std::cout << std::endl << "Resetting tilesize for the MO space to: " << tce_tile << std::endl; @@ -113,7 +127,7 @@ std::tuple setupMOIS(ExecutionContext& ec, ChemEnv& bool balance_tiles = ccsd_options.balance_tiles; if(!triples) { if(!ccsd_options.force_tilesize && ec.has_gpu()) { - tce_tile = get_ts_recommendation(ec.nnodes() * ec.ppn(), chem_env); + tce_tile = get_ts_recommendation(ec, chem_env); if(rank == 0) std::cout << std::endl << "Resetting tilesize for the MSO space to: " << tce_tile << std::endl; @@ -346,8 +360,6 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd std::vector AO_tiles = scf_vars.AO_tiles; // TiledIndexSpace tAOt{tAO.index_space(), AO_tiles}; - ExecutionContext ec_dense{ec.pg(), DistributionKind::dense, MemoryManagerKind::ga}; - auto rank = ec_dense.pg().rank().value(); TAMM_GA_SIZE N = tMO("all").max_num_indices(); IndexSpace CI{range(0, max_cvecs)}; @@ -357,12 +369,18 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd lcao_eig.setZero(); tamm_to_eigen_tensor(lcao, lcao_eig); + auto rank = ec.pg().rank().value(); + // Cholesky decomposition if(rank == 0) { cout << endl << " Begin Cholesky Decomposition" << endl; cout << std::string(45, '-') << endl; } + auto cd_t1 = std::chrono::high_resolution_clock::now(); + auto cd_t2 = cd_t1; + double cd_time = 0; + const auto nbf = nao; int64_t count = 0; // Initialize cholesky vector count @@ -373,9 +391,31 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd const auto diag_ao_file = files_prefix + ".diag_ao"; const auto cv_count_file = files_prefix + ".cholcount"; - std::vector lo_x(4, -1); // The lower limits of blocks - std::vector hi_x(4, -2); // The upper limits of blocks - std::vector ld_x(4); // The leading dims of blocks + int64_t cd_nranks = /* std::abs(std::log10(diagtol)) */ nbf / 2; // max cores + auto nnodes = ec.nnodes(); + auto ppn = ec.ppn(); + int cd_nnodes = cd_nranks / ppn; + if(cd_nranks % ppn > 0 || cd_nnodes == 0) cd_nnodes++; + if(cd_nnodes > nnodes) cd_nnodes = nnodes; + cd_nranks = cd_nnodes * ppn; + if(rank == 0) { + cout << "Total # of mpi ranks used for Cholesky decomposition: " << cd_nranks << endl + << " --> Number of nodes, mpi ranks per node: " << cd_nnodes << ", " << ppn << endl; + } + +#if defined(CD_THROTTLE) + std::vector ranks(cd_nranks); + for(int i = 0; i < cd_nranks; i++) ranks[i] = i; + MPI_Group wgroup; + MPI_Group cdgroup; + MPI_Comm cd_comm; + auto gcomm = ec.pg().comm(); + MPI_Comm_group(gcomm, &wgroup); + MPI_Group_incl(wgroup, cd_nranks, ranks.data(), &cdgroup); + MPI_Comm_create(gcomm, cdgroup, &cd_comm); + MPI_Group_free(&wgroup); + MPI_Group_free(&cdgroup); +#endif Tensor g_d_tamm{tAO, tAO}; Tensor g_r_tamm{tAO, tAO}; @@ -394,264 +434,282 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd g_d_tamm.set_dense(); g_chol_tamm.set_dense(); - Tensor::allocate(&ec_dense, g_d_tamm, g_r_tamm, g_chol_tamm); + bool cd_throttle = true; // SCF_THROTTLE_RESOURCES flag + if(rank >= cd_nranks) cd_throttle = false; -#if !defined(USE_UPCXX) - cd_tensor_zero(g_d_tamm); - cd_tensor_zero(g_r_tamm); - cd_tensor_zero(g_chol_tamm); +#if defined(CD_THROTTLE) + if(cd_throttle) { + EXPECTS(cd_comm != MPI_COMM_NULL); + ProcGroup pg_cd = ProcGroup::create_coll(cd_comm); + ExecutionContext ec_cd{pg_cd, DistributionKind::nw, MemoryManagerKind::ga}; +#else + ExecutionContext& ec_cd = ec; +#endif - auto write_chol_vectors = [&]() { - write_to_disk(g_d_tamm, diag_ao_file); - write_to_disk(g_chol_tamm, chol_ao_file); - if(rank == 0) { - std::ofstream out(cv_count_file, std::ios::out); - if(!out) cerr << "Error opening file " << cv_count_file << endl; - out << count << std::endl; - out.close(); - if(rank == 0) - cout << endl << "- Number of cholesky vectors written to disk = " << count << endl; - } - }; + ExecutionContext ec_dense{ec_cd.pg(), DistributionKind::dense, MemoryManagerKind::ga}; + + std::vector lo_x(4, -1); // The lower limits of blocks + std::vector hi_x(4, -2); // The upper limits of blocks + std::vector ld_x(4); // The leading dims of blocks + + Tensor::allocate(&ec_dense, g_d_tamm, g_r_tamm, g_chol_tamm); - const int g_chol = g_chol_tamm.ga_handle(); - const int g_d = g_d_tamm.ga_handle(); - const int g_r = g_r_tamm.ga_handle(); +#if !defined(USE_UPCXX) + cd_tensor_zero(g_d_tamm); + cd_tensor_zero(g_r_tamm); + cd_tensor_zero(g_chol_tamm); + + auto write_chol_vectors = [&]() { + write_to_disk(g_d_tamm, diag_ao_file); + write_to_disk(g_chol_tamm, chol_ao_file); + if(rank == 0) { + std::ofstream out(cv_count_file, std::ios::out); + if(!out) cerr << "Error opening file " << cv_count_file << endl; + out << count << std::endl; + out.close(); + if(rank == 0) + cout << endl << "- Number of cholesky vectors written to disk = " << count << endl; + } + }; + + const int g_chol = g_chol_tamm.ga_handle(); + const int g_d = g_d_tamm.ga_handle(); + const int g_r = g_r_tamm.ga_handle(); #if defined(CD_USE_PGAS_API) - std::vector lo_b(g_chol_tamm.num_modes(), -1); // The lower limits of blocks of B - std::vector hi_b(g_chol_tamm.num_modes(), -2); // The upper limits of blocks of B - std::vector ld_b(g_chol_tamm.num_modes()); // The leading dims of blocks of B - - std::vector lo_r(g_r_tamm.num_modes(), -1); // The lower limits of blocks of R - std::vector hi_r(g_r_tamm.num_modes(), -2); // The upper limits of blocks of R - std::vector ld_r(g_r_tamm.num_modes()); // The leading dims of blocks of R - - std::vector lo_d(g_d_tamm.num_modes(), -1); // The lower limits of blocks of D - std::vector hi_d(g_d_tamm.num_modes(), -2); // The upper limits of blocks of D - std::vector ld_d(g_d_tamm.num_modes()); // The leading dims of blocks of D - - // Distribution Check - NGA_Distribution64(g_chol, rank, lo_b.data(), hi_b.data()); - NGA_Distribution64(g_d, rank, lo_d.data(), hi_d.data()); - NGA_Distribution64(g_r, rank, lo_r.data(), hi_r.data()); - - bool has_gc_data = (lo_b[0] >= 0 && hi_b[0] >= 0); - bool has_gd_data = (lo_d[0] >= 0 && hi_d[0] >= 0); - bool has_gr_data = (lo_r[0] >= 0 && hi_r[0] >= 0); + std::vector lo_b(g_chol_tamm.num_modes(), -1); // The lower limits of blocks of B + std::vector hi_b(g_chol_tamm.num_modes(), -2); // The upper limits of blocks of B + std::vector ld_b(g_chol_tamm.num_modes()); // The leading dims of blocks of B + + std::vector lo_r(g_r_tamm.num_modes(), -1); // The lower limits of blocks of R + std::vector hi_r(g_r_tamm.num_modes(), -2); // The upper limits of blocks of R + std::vector ld_r(g_r_tamm.num_modes()); // The leading dims of blocks of R + + std::vector lo_d(g_d_tamm.num_modes(), -1); // The lower limits of blocks of D + std::vector hi_d(g_d_tamm.num_modes(), -2); // The upper limits of blocks of D + std::vector ld_d(g_d_tamm.num_modes()); // The leading dims of blocks of D + + // Distribution Check + NGA_Distribution64(g_chol, rank, lo_b.data(), hi_b.data()); + NGA_Distribution64(g_d, rank, lo_d.data(), hi_d.data()); + NGA_Distribution64(g_r, rank, lo_r.data(), hi_r.data()); + + bool has_gc_data = (lo_b[0] >= 0 && hi_b[0] >= 0); + bool has_gd_data = (lo_d[0] >= 0 && hi_d[0] >= 0); + bool has_gr_data = (lo_r[0] >= 0 && hi_r[0] >= 0); #endif #endif - ec_dense.pg().barrier(); + ec_dense.pg().barrier(); - auto cd_t1 = std::chrono::high_resolution_clock::now(); - /* Compute the diagonal - g_d_tamm stores the diagonal integrals, i.e. (uv|uv)'s - ScrCol temporarily stores all (uv|rs)'s with fixed r and s - */ - Engine engine(Operator::coulomb, max_nprim(shells), max_l(shells), 0); - const double engine_precision = chem_env.ioptions.scf_options.tol_int; + cd_t1 = std::chrono::high_resolution_clock::now(); + /* Compute the diagonal + g_d_tamm stores the diagonal integrals, i.e. (uv|uv)'s + ScrCol temporarily stores all (uv|rs)'s with fixed r and s + */ + Engine engine(Operator::coulomb, max_nprim(shells), max_l(shells), 0); + const double engine_precision = chem_env.ioptions.scf_options.tol_int; - // Compute diagonal without primitive screening - engine.set_precision(0.0); - const auto& buf = engine.results(); + // Compute diagonal without primitive screening + engine.set_precision(0.0); + const auto& buf = engine.results(); - bool cd_restart = write_cv.first && fs::exists(diag_ao_file) && fs::exists(chol_ao_file) && - fs::exists(cv_count_file); + bool cd_restart = write_cv.first && fs::exists(diag_ao_file) && fs::exists(chol_ao_file) && + fs::exists(cv_count_file); - auto compute_diagonals = [&](const IndexVector& blockid) { - auto bi0 = blockid[0]; - auto bi1 = blockid[1]; + auto compute_diagonals = [&](const IndexVector& blockid) { + auto bi0 = blockid[0]; + auto bi1 = blockid[1]; - const TAMM_SIZE size = g_d_tamm.block_size(blockid); - auto block_dims = g_d_tamm.block_dims(blockid); - std::vector dbuf(size); + const TAMM_SIZE size = g_d_tamm.block_size(blockid); + auto block_dims = g_d_tamm.block_dims(blockid); + std::vector dbuf(size); - auto bd1 = block_dims[1]; + auto bd1 = block_dims[1]; - size_t s1range_start = 0; - auto s1range_end = shell_tile_map[bi0]; - if(bi0 > 0) s1range_start = shell_tile_map[bi0 - 1] + 1; + size_t s1range_start = 0; + auto s1range_end = shell_tile_map[bi0]; + if(bi0 > 0) s1range_start = shell_tile_map[bi0 - 1] + 1; - for(auto s1 = s1range_start; s1 <= s1range_end; ++s1) { - auto n1 = shells[s1].size(); + for(auto s1 = s1range_start; s1 <= s1range_end; ++s1) { + auto n1 = shells[s1].size(); - size_t s2range_start = 0; - auto s2range_end = shell_tile_map[bi1]; - if(bi1 > 0) s2range_start = shell_tile_map[bi1 - 1] + 1; + size_t s2range_start = 0; + auto s2range_end = shell_tile_map[bi1]; + if(bi1 > 0) s2range_start = shell_tile_map[bi1 - 1] + 1; - for(size_t s2 = s2range_start; s2 <= s2range_end; ++s2) { - if(s2 > s1) { - auto s2spl = scf_vars.obs_shellpair_list[s2]; - if(std::find(s2spl.begin(), s2spl.end(), s1) == s2spl.end()) continue; - } - else { - auto s2spl = scf_vars.obs_shellpair_list[s1]; - if(std::find(s2spl.begin(), s2spl.end(), s2) == s2spl.end()) continue; - } + for(size_t s2 = s2range_start; s2 <= s2range_end; ++s2) { + if(s2 > s1) { + auto s2spl = scf_vars.obs_shellpair_list[s2]; + if(std::find(s2spl.begin(), s2spl.end(), s1) == s2spl.end()) continue; + } + else { + auto s2spl = scf_vars.obs_shellpair_list[s1]; + if(std::find(s2spl.begin(), s2spl.end(), s2) == s2spl.end()) continue; + } - auto n2 = shells[s2].size(); + auto n2 = shells[s2].size(); - // compute shell pair; return is the pointer to the buffer - engine.compute(shells[s1], shells[s2], shells[s1], shells[s2]); - const auto* buf_1212 = buf[0]; - if(buf_1212 == nullptr) continue; + // compute shell pair; return is the pointer to the buffer + engine.compute(shells[s1], shells[s2], shells[s1], shells[s2]); + const auto* buf_1212 = buf[0]; + if(buf_1212 == nullptr) continue; - auto curshelloffset_i = 0U; - auto curshelloffset_j = 0U; - for(auto x = s1range_start; x < s1; x++) curshelloffset_i += AO_tiles[x]; - for(auto x = s2range_start; x < s2; x++) curshelloffset_j += AO_tiles[x]; + auto curshelloffset_i = 0U; + auto curshelloffset_j = 0U; + for(auto x = s1range_start; x < s1; x++) curshelloffset_i += AO_tiles[x]; + for(auto x = s2range_start; x < s2; x++) curshelloffset_j += AO_tiles[x]; - auto dimi = curshelloffset_i + AO_tiles[s1]; - auto dimj = curshelloffset_j + AO_tiles[s2]; + auto dimi = curshelloffset_i + AO_tiles[s1]; + auto dimj = curshelloffset_j + AO_tiles[s2]; - for(size_t i = curshelloffset_i; i < dimi; i++) { - for(size_t j = curshelloffset_j; j < dimj; j++) { - auto f1 = i - curshelloffset_i; - auto f2 = j - curshelloffset_j; - auto f1212 = f1 * n2 * n1 * n2 + f2 * n1 * n2 + f1 * n2 + f2; - dbuf[i * bd1 + j] = buf_1212[f1212]; + for(size_t i = curshelloffset_i; i < dimi; i++) { + for(size_t j = curshelloffset_j; j < dimj; j++) { + auto f1 = i - curshelloffset_i; + auto f2 = j - curshelloffset_j; + auto f1212 = f1 * n2 * n1 * n2 + f2 * n1 * n2 + f1 * n2 + f2; + dbuf[i * bd1 + j] = buf_1212[f1212]; + } } } } + g_d_tamm.put(blockid, dbuf); + }; + // for(auto blockid: g_d_tamm.loop_nest()) { + // if(g_d_tamm.is_local_block(blockid)) { + // compute_diagonals(blockid); + // } + // } + if(!cd_restart) block_for(ec_dense, g_d_tamm(), compute_diagonals); + + cd_t2 = std::chrono::high_resolution_clock::now(); + cd_time = std::chrono::duration_cast>((cd_t2 - cd_t1)).count(); + if(rank == 0 && !cd_restart) { + std::cout << endl + << "- Time for computing the diagonal: " << std::fixed << std::setprecision(2) + << cd_time << " secs" << endl; } - g_d_tamm.put(blockid, dbuf); - }; - // for(auto blockid: g_d_tamm.loop_nest()) { - // if(g_d_tamm.is_local_block(blockid)) { - // compute_diagonals(blockid); - // } - // } - if(!cd_restart) block_for(ec_dense, g_d_tamm(), compute_diagonals); - - auto cd_t2 = std::chrono::high_resolution_clock::now(); - auto cd_time = std::chrono::duration_cast>((cd_t2 - cd_t1)).count(); - if(rank == 0 && !cd_restart) { - std::cout << endl - << "- Time for computing the diagonal: " << std::fixed << std::setprecision(2) - << cd_time << " secs" << endl; - } #if !defined(USE_UPCXX) - if(cd_restart) { - cd_t1 = std::chrono::high_resolution_clock::now(); + if(cd_restart) { + cd_t1 = std::chrono::high_resolution_clock::now(); - read_from_disk(g_d_tamm, diag_ao_file); - read_from_disk(g_chol_tamm, chol_ao_file); + read_from_disk(g_d_tamm, diag_ao_file); + read_from_disk(g_chol_tamm, chol_ao_file); - std::ifstream in(cv_count_file, std::ios::in); - int rstatus = 0; - if(in.is_open()) rstatus = 1; - if(rstatus == 1) in >> count; - else tamm_terminate("Error reading " + cv_count_file); + std::ifstream in(cv_count_file, std::ios::in); + int rstatus = 0; + if(in.is_open()) rstatus = 1; + if(rstatus == 1) in >> count; + else tamm_terminate("Error reading " + cv_count_file); - if(rank == 0) - cout << endl << "- [CD restart] Number of cholesky vectors read = " << count << endl; + if(rank == 0) + cout << endl << "- [CD restart] Number of cholesky vectors read = " << count << endl; - cd_t2 = std::chrono::high_resolution_clock::now(); - cd_time = std::chrono::duration_cast>((cd_t2 - cd_t1)).count(); - if(rank == 0) { - std::cout << "- [CD restart] Time for reading the diagonal and cholesky vectors: " - << std::fixed << std::setprecision(2) << cd_time << " secs" << endl; + cd_t2 = std::chrono::high_resolution_clock::now(); + cd_time = std::chrono::duration_cast>((cd_t2 - cd_t1)).count(); + if(rank == 0) { + std::cout << "- [CD restart] Time for reading the diagonal and cholesky vectors: " + << std::fixed << std::setprecision(2) << cd_time << " secs" << endl; + } } - } #endif - auto cd_t3 = std::chrono::high_resolution_clock::now(); - - auto [val_d0, blkid, eoff] = tamm::max_element(g_d_tamm); - auto blkoff = g_d_tamm.block_offsets(blkid); - std::vector indx_d0(g_d_tamm.num_modes()); - indx_d0[0] = (int64_t) blkoff[0] + (int64_t) eoff[0]; - indx_d0[1] = (int64_t) blkoff[1] + (int64_t) eoff[1]; - - // Reset Engine precision - const double schwarz_tol = chem_env.ioptions.scf_options.tol_sch; - engine.set_precision(engine_precision); - - while(val_d0 > diagtol && count < max_cvecs) { - auto bfu = indx_d0[0]; - auto bfv = indx_d0[1]; - auto s1 = bf2shell[bfu]; - auto n1 = shells[s1].size(); - auto s2 = bf2shell[bfv]; - auto n2 = shells[s2].size(); - auto n12 = n1 * n2; - auto f1 = bfu - shell2bf[s1]; - auto f2 = bfv - shell2bf[s2]; - auto ind12 = f1 * n2 + f2; - auto schwarz_12 = SchwarzK(s1, s2); + auto cd_t3 = std::chrono::high_resolution_clock::now(); + + auto [val_d0, blkid, eoff] = tamm::max_element(g_d_tamm); + auto blkoff = g_d_tamm.block_offsets(blkid); + std::vector indx_d0(g_d_tamm.num_modes()); + indx_d0[0] = (int64_t) blkoff[0] + (int64_t) eoff[0]; + indx_d0[1] = (int64_t) blkoff[1] + (int64_t) eoff[1]; + + // Reset Engine precision + const double schwarz_tol = chem_env.ioptions.scf_options.tol_sch; + engine.set_precision(engine_precision); + + while(val_d0 > diagtol && count < max_cvecs) { + auto bfu = indx_d0[0]; + auto bfv = indx_d0[1]; + auto s1 = bf2shell[bfu]; + auto n1 = shells[s1].size(); + auto s2 = bf2shell[bfv]; + auto n2 = shells[s2].size(); + auto n12 = n1 * n2; + auto f1 = bfu - shell2bf[s1]; + auto f2 = bfv - shell2bf[s2]; + auto ind12 = f1 * n2 + f2; + auto schwarz_12 = SchwarzK(s1, s2); #if !defined(USE_UPCXX) - cd_tensor_zero(g_r_tamm); + cd_tensor_zero(g_r_tamm); #endif #if !defined(CD_USE_PGAS_API) - auto compute_eri = [&](const IndexVector& blockid) { - auto bi0 = blockid[0]; - auto bi1 = blockid[1]; + auto compute_eri = [&](const IndexVector& blockid) { + auto bi0 = blockid[0]; + auto bi1 = blockid[1]; - const TAMM_SIZE size = g_r_tamm.block_size(blockid); - auto block_dims = g_r_tamm.block_dims(blockid); - std::vector dbuf(size); - - auto bd1 = block_dims[1]; - - auto s3range_start = 0l; - auto s3range_end = shell_tile_map[bi0]; - if(bi0 > 0) s3range_start = shell_tile_map[bi0 - 1] + 1; + const TAMM_SIZE size = g_r_tamm.block_size(blockid); + auto block_dims = g_r_tamm.block_dims(blockid); + std::vector dbuf(size); - for(Index s3 = s3range_start; s3 <= s3range_end; ++s3) { - auto n3 = shells[s3].size(); + auto bd1 = block_dims[1]; - auto s4range_start = 0l; - auto s4range_end = shell_tile_map[bi1]; - if(bi1 > 0) s4range_start = shell_tile_map[bi1 - 1] + 1; - - for(Index s4 = s4range_start; s4 <= s4range_end; ++s4) { - if(s4 > s3) { - auto s2spl = obs_shellpair_list[s4]; - if(std::find(s2spl.begin(), s2spl.end(), s3) == s2spl.end()) continue; - } - else { - auto s2spl = obs_shellpair_list[s3]; - if(std::find(s2spl.begin(), s2spl.end(), s4) == s2spl.end()) continue; - } + auto s3range_start = 0l; + auto s3range_end = shell_tile_map[bi0]; + if(bi0 > 0) s3range_start = shell_tile_map[bi0 - 1] + 1; - auto n4 = shells[s4].size(); - if(schwarz_12 * SchwarzK(s3, s4) < schwarz_tol) continue; + for(Index s3 = s3range_start; s3 <= s3range_end; ++s3) { + auto n3 = shells[s3].size(); - // compute shell pair; return is the pointer to the buffer - engine.compute(shells[s3], shells[s4], shells[s1], shells[s2]); - const auto* buf_3412 = buf[0]; - if(buf_3412 == nullptr) continue; // if all integrals screened out, skip to next quartet - - auto curshelloffset_i = 0U; - auto curshelloffset_j = 0U; - for(auto x = s3range_start; x < s3; x++) curshelloffset_i += AO_tiles[x]; - for(auto x = s4range_start; x < s4; x++) curshelloffset_j += AO_tiles[x]; + auto s4range_start = 0l; + auto s4range_end = shell_tile_map[bi1]; + if(bi1 > 0) s4range_start = shell_tile_map[bi1 - 1] + 1; - auto dimi = curshelloffset_i + AO_tiles[s3]; - auto dimj = curshelloffset_j + AO_tiles[s4]; + for(Index s4 = s4range_start; s4 <= s4range_end; ++s4) { + if(s4 > s3) { + auto s2spl = obs_shellpair_list[s4]; + if(std::find(s2spl.begin(), s2spl.end(), s3) == s2spl.end()) continue; + } + else { + auto s2spl = obs_shellpair_list[s3]; + if(std::find(s2spl.begin(), s2spl.end(), s4) == s2spl.end()) continue; + } - for(size_t i = curshelloffset_i; i < dimi; i++) { - for(size_t j = curshelloffset_j; j < dimj; j++) { - auto f3 = i - curshelloffset_i; - auto f4 = j - curshelloffset_j; - auto f3412 = f3 * n4 * n12 + f4 * n12 + ind12; - auto x = buf_3412[f3412]; - dbuf[i * bd1 + j] = x; + auto n4 = shells[s4].size(); + if(schwarz_12 * SchwarzK(s3, s4) < schwarz_tol) continue; + + // compute shell pair; return is the pointer to the buffer + engine.compute(shells[s3], shells[s4], shells[s1], shells[s2]); + const auto* buf_3412 = buf[0]; + if(buf_3412 == nullptr) continue; // if all integrals screened out, skip to next quartet + + auto curshelloffset_i = 0U; + auto curshelloffset_j = 0U; + for(auto x = s3range_start; x < s3; x++) curshelloffset_i += AO_tiles[x]; + for(auto x = s4range_start; x < s4; x++) curshelloffset_j += AO_tiles[x]; + + auto dimi = curshelloffset_i + AO_tiles[s3]; + auto dimj = curshelloffset_j + AO_tiles[s4]; + + for(size_t i = curshelloffset_i; i < dimi; i++) { + for(size_t j = curshelloffset_j; j < dimj; j++) { + auto f3 = i - curshelloffset_i; + auto f4 = j - curshelloffset_j; + auto f3412 = f3 * n4 * n12 + f4 * n12 + ind12; + auto x = buf_3412[f3412]; + dbuf[i * bd1 + j] = x; + } } } } - } - g_r_tamm.put(blockid, dbuf); - }; - // for(auto blockid: g_r_tamm.loop_nest()) { - // if(g_r_tamm.is_local_block(blockid)) - // { compute_eri(blockid); } - // } - block_for(ec_dense, g_r_tamm(), compute_eri); + g_r_tamm.put(blockid, dbuf); + }; + // for(auto blockid: g_r_tamm.loop_nest()) { + // if(g_r_tamm.is_local_block(blockid)) + // { compute_eri(blockid); } + // } + block_for(ec_dense, g_r_tamm(), compute_eri); #else for(size_t s3 = 0; s3 != shells.size(); ++s3) { @@ -699,16 +757,16 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd #endif #ifndef USE_UPCXX - lo_x[0] = indx_d0[0]; - lo_x[1] = indx_d0[1]; - lo_x[2] = 0; - lo_x[3] = 0; - hi_x[0] = indx_d0[0]; - hi_x[1] = indx_d0[1]; - hi_x[2] = count; - hi_x[3] = 0; - ld_x[0] = 1; - ld_x[1] = hi_x[2] + 1; + lo_x[0] = indx_d0[0]; + lo_x[1] = indx_d0[1]; + lo_x[2] = 0; + lo_x[3] = 0; + hi_x[0] = indx_d0[0]; + hi_x[1] = indx_d0[1]; + hi_x[2] = count; + hi_x[3] = 0; + ld_x[0] = 1; + ld_x[1] = hi_x[2] + 1; #else lo_x[0] = 0; lo_x[1] = indx_d0[0]; @@ -720,83 +778,83 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd hi_x[3] = count; #endif - std::vector k_row(max_cvecs); + std::vector k_row(max_cvecs); #if !defined(CD_USE_PGAS_API) - auto update_diagonals = [&](const IndexVector& blockid) { - auto bi0 = blockid[0]; - auto bi1 = blockid[1]; - - IndexVector rdblockid = {bi0, bi1}; - const TAMM_SIZE size = g_chol_tamm.block_size(blockid); - auto block_dims = g_chol_tamm.block_dims(blockid); - const TAMM_SIZE rdsize = g_r_tamm.block_size(rdblockid); - std::vector cbuf(size); - std::vector rbuf(rdsize); - std::vector dbuf(rdsize); - - g_r_tamm.get(rdblockid, rbuf); - g_d_tamm.get(rdblockid, dbuf); - g_chol_tamm.get(blockid, cbuf); - - auto bd0 = block_dims[1]; - auto bd1 = block_dims[2]; - - auto s3range_start = 0l; - auto s3range_end = shell_tile_map[bi0]; - if(bi0 > 0) s3range_start = shell_tile_map[bi0 - 1] + 1; - - for(Index s3 = s3range_start; s3 <= s3range_end; ++s3) { - auto n3 = shells[s3].size(); + auto update_diagonals = [&](const IndexVector& blockid) { + auto bi0 = blockid[0]; + auto bi1 = blockid[1]; + + IndexVector rdblockid = {bi0, bi1}; + const TAMM_SIZE size = g_chol_tamm.block_size(blockid); + auto block_dims = g_chol_tamm.block_dims(blockid); + const TAMM_SIZE rdsize = g_r_tamm.block_size(rdblockid); + std::vector cbuf(size); + std::vector rbuf(rdsize); + std::vector dbuf(rdsize); + + g_r_tamm.get(rdblockid, rbuf); + g_d_tamm.get(rdblockid, dbuf); + g_chol_tamm.get(blockid, cbuf); + + auto bd0 = block_dims[1]; + auto bd1 = block_dims[2]; + + auto s3range_start = 0l; + auto s3range_end = shell_tile_map[bi0]; + if(bi0 > 0) s3range_start = shell_tile_map[bi0 - 1] + 1; + + for(Index s3 = s3range_start; s3 <= s3range_end; ++s3) { + auto n3 = shells[s3].size(); + + auto s4range_start = 0l; + auto s4range_end = shell_tile_map[bi1]; + if(bi1 > 0) s4range_start = shell_tile_map[bi1 - 1] + 1; + + for(Index s4 = s4range_start; s4 <= s4range_end; ++s4) { + if(s4 > s3) { + auto s2spl = obs_shellpair_list[s4]; + if(std::find(s2spl.begin(), s2spl.end(), s3) == s2spl.end()) continue; + } + else { + auto s2spl = obs_shellpair_list[s3]; + if(std::find(s2spl.begin(), s2spl.end(), s4) == s2spl.end()) continue; + } - auto s4range_start = 0l; - auto s4range_end = shell_tile_map[bi1]; - if(bi1 > 0) s4range_start = shell_tile_map[bi1 - 1] + 1; + auto n4 = shells[s4].size(); - for(Index s4 = s4range_start; s4 <= s4range_end; ++s4) { - if(s4 > s3) { - auto s2spl = obs_shellpair_list[s4]; - if(std::find(s2spl.begin(), s2spl.end(), s3) == s2spl.end()) continue; - } - else { - auto s2spl = obs_shellpair_list[s3]; - if(std::find(s2spl.begin(), s2spl.end(), s4) == s2spl.end()) continue; - } + auto curshelloffset_i = 0U; + auto curshelloffset_j = 0U; + for(auto x = s3range_start; x < s3; x++) curshelloffset_i += AO_tiles[x]; + for(auto x = s4range_start; x < s4; x++) curshelloffset_j += AO_tiles[x]; - auto n4 = shells[s4].size(); + auto dimi = curshelloffset_i + AO_tiles[s3]; + auto dimj = curshelloffset_j + AO_tiles[s4]; - auto curshelloffset_i = 0U; - auto curshelloffset_j = 0U; - for(auto x = s3range_start; x < s3; x++) curshelloffset_i += AO_tiles[x]; - for(auto x = s4range_start; x < s4; x++) curshelloffset_j += AO_tiles[x]; - - auto dimi = curshelloffset_i + AO_tiles[s3]; - auto dimj = curshelloffset_j + AO_tiles[s4]; - - for(size_t i = curshelloffset_i; i < dimi; i++) { - for(size_t j = curshelloffset_j; j < dimj; j++) { - for(decltype(count) icount = 0; icount < count; icount++) { - rbuf[i * bd0 + j] -= cbuf[icount + j * bd1 + i * bd0 * bd1] * k_row[icount]; + for(size_t i = curshelloffset_i; i < dimi; i++) { + for(size_t j = curshelloffset_j; j < dimj; j++) { + for(decltype(count) icount = 0; icount < count; icount++) { + rbuf[i * bd0 + j] -= cbuf[icount + j * bd1 + i * bd0 * bd1] * k_row[icount]; + } + auto vtmp = rbuf[i * bd0 + j] / sqrt(val_d0); + cbuf[count + j * bd1 + i * bd1 * bd0] = vtmp; + dbuf[i * bd0 + j] -= vtmp * vtmp; } - auto vtmp = rbuf[i * bd0 + j] / sqrt(val_d0); - cbuf[count + j * bd1 + i * bd1 * bd0] = vtmp; - dbuf[i * bd0 + j] -= vtmp * vtmp; } } } - } - g_r_tamm.put(rdblockid, rbuf); - g_d_tamm.put(rdblockid, dbuf); - g_chol_tamm.put(blockid, cbuf); - }; - // for(auto blockid: g_chol_tamm.loop_nest()) { - // if(g_chol_tamm.is_local_block(blockid)) - // { update_diagonals(blockid); } - // } - block_for(ec_dense, g_chol_tamm(), update_diagonals); + g_r_tamm.put(rdblockid, rbuf); + g_d_tamm.put(rdblockid, dbuf); + g_chol_tamm.put(blockid, cbuf); + }; + // for(auto blockid: g_chol_tamm.loop_nest()) { + // if(g_chol_tamm.is_local_block(blockid)) + // { update_diagonals(blockid); } + // } + block_for(ec_dense, g_chol_tamm(), update_diagonals); - count++; + count++; #else @@ -881,34 +939,55 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd #endif #endif - std::tie(val_d0, blkid, eoff) = tamm::max_element(g_d_tamm); - blkoff = g_d_tamm.block_offsets(blkid); - indx_d0[0] = (int64_t) blkoff[0] + (int64_t) eoff[0]; - indx_d0[1] = (int64_t) blkoff[1] + (int64_t) eoff[1]; + std::tie(val_d0, blkid, eoff) = tamm::max_element(g_d_tamm); + blkoff = g_d_tamm.block_offsets(blkid); + indx_d0[0] = (int64_t) blkoff[0] + (int64_t) eoff[0]; + indx_d0[1] = (int64_t) blkoff[1] + (int64_t) eoff[1]; #if !defined(USE_UPCXX) - // Restart - if(write_cv.first && count % write_cv.second == 0 && nbf > 1000) { write_chol_vectors(); } + // Restart + if(write_cv.first && count % write_cv.second == 0 && nbf > 1000) { write_chol_vectors(); } #endif - } // while + } // while - if(rank == 0) std::cout << endl << "- Total number of cholesky vectors = " << count << std::endl; + if(rank == 0) + std::cout << endl << "- Total number of cholesky vectors = " << count << std::endl; #if !defined(USE_UPCXX) - if(write_cv.first && nbf > 1000) write_chol_vectors(); + if(write_cv.first && nbf > 1000) write_chol_vectors(); #endif - Tensor::deallocate(g_d_tamm, g_r_tamm); + write_to_disk(g_chol_tamm, chol_ao_file); + Tensor::deallocate(g_d_tamm, g_r_tamm, g_chol_tamm); - auto cd_t4 = std::chrono::high_resolution_clock::now(); - cd_time = std::chrono::duration_cast>((cd_t4 - cd_t3)).count(); - if(rank == 0) { - std::cout << endl - << "- Time to compute cholesky vectors: " << std::fixed << std::setprecision(2) - << cd_time << " secs" << endl - << endl; - } + auto cd_t4 = std::chrono::high_resolution_clock::now(); + cd_time = std::chrono::duration_cast>((cd_t4 - cd_t3)).count(); + if(rank == 0) { + std::cout << endl + << "- Time to compute cholesky vectors: " << std::fixed << std::setprecision(2) + << cd_time << " secs" << endl + << endl; + } + +#if defined(CD_THROTTLE) + ec_cd.flush_and_sync(); + ec_dense.flush_and_sync(); + ec_cd.pg().destroy_coll(); + } // if(cd_throttle) + + if(cd_comm != MPI_COMM_NULL) MPI_Comm_free(&cd_comm); + ExecutionContext ec_dense{ec.pg(), DistributionKind::dense, MemoryManagerKind::ga}; +#endif + + ec.pg().barrier(); + ec.pg().broadcast(&count, 0); + + Tensor g_chol_tamm1{tAO, tAO, tCI}; + g_chol_tamm1.set_dense(); + Tensor::allocate(&ec_dense, g_chol_tamm1); + read_from_disk(g_chol_tamm1, chol_ao_file); + fs::remove(chol_ao_file); update_sysdata(ec, chem_env, tMO, is_mso); @@ -941,9 +1020,10 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd std::vector sbuf(dsize); #ifdef USE_UPCXX - g_chol_tamm.get_raw(lo, hi, sbuf.data()); + g_chol_tamm1.get_raw(lo, hi, sbuf.data()); #else - int64_t ld[2] = {cd_ncast(block_dims[1]), cd_ncast(block_dims[2])}; + int64_t ld[2] = {cd_ncast(block_dims[1]), cd_ncast(block_dims[2])}; + const int g_chol = g_chol_tamm1.ga_handle(); NGA_Get64(g_chol, &lo[1], &hi[1], sbuf.data(), ld); #endif @@ -963,7 +1043,8 @@ Tensor cholesky_2e(ChemEnv& chem_env, ExecutionContext& ec, TiledInd if(rank == 0) eigen_to_tamm_tensor(lcao, lcao_new); } - Tensor::deallocate(g_chol_tamm); + Tensor::deallocate(g_chol_tamm1); + ec_dense.flush_and_sync(); Tensor CholVpr_tmp{tMO, tAO, tCIp}; Tensor CholVpr_tamm{{tMO, tMO, tCIp}, diff --git a/exachem/cholesky/cholesky_2e.hpp b/exachem/cholesky/cholesky_2e.hpp index 92901a9..400854b 100644 --- a/exachem/cholesky/cholesky_2e.hpp +++ b/exachem/cholesky/cholesky_2e.hpp @@ -24,7 +24,7 @@ using TAMM_GA_SIZE = int64_t; namespace exachem::cholesky_2e { -int get_ts_recommendation(int nranks, ChemEnv& chem_env); +int get_ts_recommendation(ExecutionContext& ec, ChemEnv& chem_env); std::tuple setup_mo_red(ExecutionContext& ec, ChemEnv& chem_env, bool triples = false); diff --git a/exachem/common/atom_info.hpp b/exachem/common/atom_info.hpp index b9bec2c..17492f5 100644 --- a/exachem/common/atom_info.hpp +++ b/exachem/common/atom_info.hpp @@ -16,4 +16,4 @@ class AtomInfo { int atomic_number; std::string symbol; std::vector shells; // shells corresponding to this atom -}; \ No newline at end of file +}; diff --git a/exachem/common/chemenv.cpp b/exachem/common/chemenv.cpp index 54b96c2..bc270b9 100644 --- a/exachem/common/chemenv.cpp +++ b/exachem/common/chemenv.cpp @@ -222,8 +222,7 @@ void ChemEnv::write_json_data(const std::string cmodule) { std::string l_module = cmodule; txt_utils::to_lower(l_module); - std::string files_dir = - sys_data.output_file_prefix + "_files/" + ioptions.scf_options.scf_type + "/json"; + std::string files_dir = workspace_dir + ioptions.scf_options.scf_type + "/json"; if(!fs::exists(files_dir)) fs::create_directories(files_dir); std::string files_prefix = files_dir + "/" + sys_data.output_file_prefix; std::string json_file = files_prefix + "." + l_module + ".json"; diff --git a/exachem/common/cutils.cpp b/exachem/common/cutils.cpp index 751635e..4e1c5de 100644 --- a/exachem/common/cutils.cpp +++ b/exachem/common/cutils.cpp @@ -116,11 +116,10 @@ void setup_scalapack_info(tamm::ExecutionContext& ec, ChemEnv& chem_env, ProcGroupData get_spg_data(ExecutionContext& ec, const size_t N, const int node_p, const int nbf_p, const int node_inp) { ProcGroupData pgdata; - pgdata.nnodes = ec.nnodes(); - pgdata.ppn = ec.ppn(); + pgdata.ppn = ec.ppn(); const int ppn = pgdata.ppn; - const int nnodes = pgdata.nnodes; + const int nnodes = ec.nnodes(); int spg_guessranks = std::ceil((nbf_p / 100.0) * N); if(node_p > 0) spg_guessranks = std::ceil((node_p / 100.0) * nnodes); diff --git a/exachem/common/cutils.hpp b/exachem/common/cutils.hpp index 70c2fbf..65972f2 100644 --- a/exachem/common/cutils.hpp +++ b/exachem/common/cutils.hpp @@ -55,7 +55,6 @@ struct ScalapackInfo { // Contains node, ppn information used for creating a smaller process group from world group struct ProcGroupData { - int nnodes{}; // total number of nodes int spg_nnodes{}; // number of nodes in smaller process group int ppn{}; // processes per node int spg_nranks{}; // number of rank in smaller process group @@ -63,7 +62,7 @@ struct ProcGroupData { int scalapack_nnodes{}; int scalapack_nranks{}; - auto unpack() { return std::make_tuple(nnodes, spg_nnodes, ppn, spg_nranks); } + auto unpack() { return std::make_tuple(spg_nnodes, ppn, spg_nranks); } }; #if defined(USE_SCALAPACK) @@ -85,4 +84,4 @@ void setup_scalapack_info(tamm::ExecutionContext& ec, ChemEnv& chem_env, // Nbf, % of nodes, % of Nbf, nnodes from input file ProcGroupData get_spg_data(ExecutionContext& ec, const size_t N, const int node_p, - const int nbf_p = -1, const int node_inp = -1); + const int nbf_p = -1, const int node_inp = -1); \ No newline at end of file diff --git a/exachem/common/ecatom.cpp b/exachem/common/ecatom.cpp index 4734a8a..3f6e02f 100644 --- a/exachem/common/ecatom.cpp +++ b/exachem/common/ecatom.cpp @@ -24,4 +24,4 @@ int ECAtom::get_atomic_number(std::string element_symbol) { } return Z; -} \ No newline at end of file +} diff --git a/exachem/common/options/input_options.cpp b/exachem/common/options/input_options.cpp index 98ffb32..e5e8da3 100644 --- a/exachem/common/options/input_options.cpp +++ b/exachem/common/options/input_options.cpp @@ -219,6 +219,7 @@ void CommonOptions::print() { std::cout << " geom_units = " << geom_units << std::endl; txt_utils::print_bool(" debug ", debug); if(!file_prefix.empty()) std::cout << " file_prefix = " << file_prefix << std::endl; + if(!output_dir.empty()) std::cout << " output_dir = " << output_dir << std::endl; std::cout << "}" << std::endl; } diff --git a/exachem/common/options/input_options.hpp b/exachem/common/options/input_options.hpp index af65a4c..f382418 100644 --- a/exachem/common/options/input_options.hpp +++ b/exachem/common/options/input_options.hpp @@ -26,6 +26,7 @@ class CommonOptions { std::string gaussian_type{"spherical"}; std::string geom_units{"angstrom"}; std::string file_prefix{}; + std::string output_dir{}; std::string ext_data_path{}; void print(); }; diff --git a/exachem/common/options/parse_ccsd_options.cpp b/exachem/common/options/parse_ccsd_options.cpp index 231edca..23ea412 100644 --- a/exachem/common/options/parse_ccsd_options.cpp +++ b/exachem/common/options/parse_ccsd_options.cpp @@ -162,5 +162,6 @@ void ParseCCSDOptions::update_common_options(ChemEnv& chem_env) { cc_options.gaussian_type = common_options.gaussian_type; cc_options.geom_units = common_options.geom_units; cc_options.file_prefix = common_options.file_prefix; + cc_options.output_dir = common_options.output_dir; cc_options.ext_data_path = common_options.ext_data_path; } diff --git a/exachem/common/options/parse_ccsd_options.hpp b/exachem/common/options/parse_ccsd_options.hpp index 30a555a..f5d3263 100644 --- a/exachem/common/options/parse_ccsd_options.hpp +++ b/exachem/common/options/parse_ccsd_options.hpp @@ -23,4 +23,4 @@ class ParseCCSDOptions: public ParserUtils { ParseCCSDOptions(ChemEnv& chem_env); void operator()(ChemEnv& chem_env); void print(ChemEnv& chem_env); -}; +}; \ No newline at end of file diff --git a/exachem/common/options/parse_cd_options.hpp b/exachem/common/options/parse_cd_options.hpp index a0ede83..6a2e48d 100644 --- a/exachem/common/options/parse_cd_options.hpp +++ b/exachem/common/options/parse_cd_options.hpp @@ -21,4 +21,4 @@ class ParseCDOptions: public ParserUtils { ParseCDOptions(ChemEnv& chem_env); void operator()(ChemEnv& chem_env); void print(ChemEnv& chem_env); -}; +}; \ No newline at end of file diff --git a/exachem/common/options/parse_common_options.cpp b/exachem/common/options/parse_common_options.cpp index e6d2f7e..7c1c288 100644 --- a/exachem/common/options/parse_common_options.cpp +++ b/exachem/common/options/parse_common_options.cpp @@ -44,6 +44,7 @@ void ParseCommonOptions::parse(ChemEnv& chem_env) { parse_option(common_options.maxiter, jinput["common"], "maxiter"); parse_option(common_options.debug, jinput["common"], "debug"); parse_option(common_options.file_prefix, jinput["common"], "file_prefix"); + parse_option(common_options.output_dir, jinput["common"], "output_dir"); // parse cube options here for now parse_option(chem_env.ioptions.dplot_options.cube, jinput["DPLOT"], "cube"); diff --git a/exachem/common/options/parse_fci_options.cpp b/exachem/common/options/parse_fci_options.cpp index 9c7f035..4064572 100644 --- a/exachem/common/options/parse_fci_options.cpp +++ b/exachem/common/options/parse_fci_options.cpp @@ -84,5 +84,6 @@ void ParseFCIOptions::update_common_options(ChemEnv& chem_env) { fci_options.gaussian_type = common_options.gaussian_type; fci_options.geom_units = common_options.geom_units; fci_options.file_prefix = common_options.file_prefix; + fci_options.output_dir = common_options.output_dir; fci_options.ext_data_path = common_options.ext_data_path; } \ No newline at end of file diff --git a/exachem/common/options/parse_fci_options.hpp b/exachem/common/options/parse_fci_options.hpp index 377be36..cf87786 100644 --- a/exachem/common/options/parse_fci_options.hpp +++ b/exachem/common/options/parse_fci_options.hpp @@ -25,4 +25,4 @@ class ParseFCIOptions: public ParserUtils { // void print(ChemEnv& chem_env); }; -// Have to add print() functionls +// Have to add print() functionls \ No newline at end of file diff --git a/exachem/common/options/parse_gw_options.cpp b/exachem/common/options/parse_gw_options.cpp index 3a3f794..b56b4b6 100644 --- a/exachem/common/options/parse_gw_options.cpp +++ b/exachem/common/options/parse_gw_options.cpp @@ -50,5 +50,7 @@ void ParseGWOptions::update_common_options(ChemEnv& chem_env) { gw_options.gaussian_type = common_options.gaussian_type; gw_options.geom_units = common_options.geom_units; gw_options.file_prefix = common_options.file_prefix; + gw_options.output_dir = common_options.output_dir; + gw_options.ext_data_path = common_options.ext_data_path; } diff --git a/exachem/common/options/parse_gw_options.hpp b/exachem/common/options/parse_gw_options.hpp index 38f902d..f7cd600 100644 --- a/exachem/common/options/parse_gw_options.hpp +++ b/exachem/common/options/parse_gw_options.hpp @@ -23,4 +23,4 @@ class ParseGWOptions: public ParserUtils { ParseGWOptions(ChemEnv& chem_env); void operator()(ChemEnv& chem_env); void print(ChemEnv& chem_env); -}; +}; \ No newline at end of file diff --git a/exachem/common/options/parse_options.cpp b/exachem/common/options/parse_options.cpp index 5f57c02..b7c01e8 100644 --- a/exachem/common/options/parse_options.cpp +++ b/exachem/common/options/parse_options.cpp @@ -42,7 +42,8 @@ void ECOptionParser::parse_n_check(std::string_view filename, json& jinput) { "] in the input file"); } - const std::vector valid_common{"comments", "maxiter", "debug", "file_prefix"}; + const std::vector valid_common{"comments", "maxiter", "debug", "file_prefix", + "output_dir"}; for(auto& el: jinput["common"].items()) { if(std::find(valid_common.begin(), valid_common.end(), el.key()) == valid_common.end()) { tamm_terminate("INPUT FILE ERROR: Invalid common section option [" + el.key() + @@ -101,4 +102,4 @@ void ECOptionParser::parse_all_options(ChemEnv& chem_env) { ParseFCIOptions parse_fci_options(chem_env); ParseTaskOptions parse_task_options(chem_env); IniSystemData ini_sys_data(chem_env); -} +} \ No newline at end of file diff --git a/exachem/common/options/parse_options.hpp b/exachem/common/options/parse_options.hpp index f5b0a09..b57db62 100644 --- a/exachem/common/options/parse_options.hpp +++ b/exachem/common/options/parse_options.hpp @@ -29,4 +29,4 @@ class ECOptionParser: public ParserUtils { ECOptionParser(ChemEnv& chem_env); void initialize(ChemEnv& chem_env); void parse_all_options(ChemEnv& chem_env); -}; +}; \ No newline at end of file diff --git a/exachem/common/options/parse_scf_options.cpp b/exachem/common/options/parse_scf_options.cpp index f01d371..7ab5233 100644 --- a/exachem/common/options/parse_scf_options.cpp +++ b/exachem/common/options/parse_scf_options.cpp @@ -118,6 +118,11 @@ void ParseSCFOptions::parse(ChemEnv& chem_env) { if(scf_options.nnodes < 1 || scf_options.nnodes > 100) { tamm_terminate("INPUT FILE ERROR: SCF option nnodes should be a number between 1 and 100"); } + if(scf_options.multiplicity > 1 && scf_options.scf_type != "unrestricted") { + tamm_terminate( + "INPUT FILE ERROR: SCF option scf_type should be set to unrestricted when multiplicity>1"); + } + { auto xc_grid_str = scf_options.xc_grid_type; xc_grid_str.erase(remove_if(xc_grid_str.begin(), xc_grid_str.end(), isspace), @@ -143,5 +148,6 @@ void ParseSCFOptions::update_common_options(ChemEnv& chem_env) { scf_options.gaussian_type = common_options.gaussian_type; scf_options.geom_units = common_options.geom_units; scf_options.file_prefix = common_options.file_prefix; + scf_options.output_dir = common_options.output_dir; scf_options.ext_data_path = common_options.ext_data_path; } diff --git a/exachem/common/options/parse_task_options.cpp b/exachem/common/options/parse_task_options.cpp index b711495..dcb0458 100644 --- a/exachem/common/options/parse_task_options.cpp +++ b/exachem/common/options/parse_task_options.cpp @@ -67,5 +67,6 @@ void ParseTaskOptions::update_common_options(ChemEnv& chem_env) { task_options.gaussian_type = common_options.gaussian_type; task_options.geom_units = common_options.geom_units; task_options.file_prefix = common_options.file_prefix; + task_options.output_dir = common_options.output_dir; task_options.ext_data_path = common_options.ext_data_path; } diff --git a/exachem/common/options/parse_task_options.hpp b/exachem/common/options/parse_task_options.hpp index 7ff7c33..53bc804 100644 --- a/exachem/common/options/parse_task_options.hpp +++ b/exachem/common/options/parse_task_options.hpp @@ -22,4 +22,4 @@ class ParseTaskOptions: public ParserUtils { ParseTaskOptions(ChemEnv& chem_env); void operator()(ChemEnv& chem_env); void print(ChemEnv& chem_env); -}; +}; \ No newline at end of file diff --git a/exachem/common/txt_utils.cpp b/exachem/common/txt_utils.cpp index a9571fb..0c21027 100644 --- a/exachem/common/txt_utils.cpp +++ b/exachem/common/txt_utils.cpp @@ -31,4 +31,4 @@ bool txt_utils::strequal_case(const std::string& a, const std::string& b) { void txt_utils::print_bool(const std::string str, bool val) { std::cout << str << " = " << std::boolalpha << val << std::endl; -} \ No newline at end of file +} diff --git a/exachem/scf/scf_compute.cpp b/exachem/scf/scf_compute.cpp index e254800..837073f 100644 --- a/exachem/scf/scf_compute.cpp +++ b/exachem/scf/scf_compute.cpp @@ -144,11 +144,11 @@ void exachem::scf::SCFCompute::compute_sdens_to_cdens(const libint2::BasisSet& s std::vector& CtoS = etensors.trafo_ctos; auto shell2bf = shells.shell2bf(); int nsh = shells.size(); - int Nspher{0}; - int Ncart{0}; + // int Nspher{0}; + int Ncart{0}; for(auto& shell: shells) { int l = shell.contr[0].l; - Nspher += 2 * l + 1; + // Nspher += 2 * l + 1; Ncart += ((l + 1) * (l + 2)) / 2; } @@ -190,11 +190,11 @@ void exachem::scf::SCFCompute::compute_cpot_to_spot(const libint2::BasisSet& she auto shell2bf = shells.shell2bf(); int nsh = shells.size(); int Nspher{0}; - int Ncart{0}; + // int Ncart{0}; for(auto& shell: shells) { int l = shell.contr[0].l; Nspher += 2 * l + 1; - Ncart += ((l + 1) * (l + 2)) / 2; + // Ncart += ((l + 1) * (l + 2)) / 2; } Spherical = Matrix::Zero(Nspher, Nspher); diff --git a/exachem/scf/scf_hartree_fock.cpp b/exachem/scf/scf_hartree_fock.cpp index 801f125..6cf4814 100644 --- a/exachem/scf/scf_hartree_fock.cpp +++ b/exachem/scf/scf_hartree_fock.cpp @@ -746,7 +746,7 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e #if SCF_THROTTLE_RESOURCES pgdata = get_spg_data(exc, N, -1, 50, chem_env.ioptions.scf_options.nnodes); - auto [t_nnodes, hf_nnodes, ppn, hf_nranks] = pgdata.unpack(); + auto [hf_nnodes, ppn, hf_nranks] = pgdata.unpack(); if(rank == 0) { std::cout << "\n Number of nodes, processes per node used for SCF calculation: " << hf_nnodes << ", " << ppn << std::endl; @@ -756,11 +756,11 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e gcomm = exc.pg().team(); hf_comm = new upcxx::team(gcomm->split(in_new_team ? 0 : upcxx::team::color_none, rank.value())); #else - int ranks[hf_nranks]; + std::vector ranks(hf_nranks); for(int i = 0; i < hf_nranks; i++) ranks[i] = i; auto gcomm = exc.pg().comm(); MPI_Comm_group(gcomm, &wgroup); - MPI_Group_incl(wgroup, hf_nranks, ranks, &hfgroup); + MPI_Group_incl(wgroup, hf_nranks, ranks.data(), &hfgroup); MPI_Comm_create(gcomm, hfgroup, &hf_comm); MPI_Group_free(&wgroup); MPI_Group_free(&hfgroup); diff --git a/exachem/scf/scf_hartree_fock.hpp b/exachem/scf/scf_hartree_fock.hpp index f2a562d..0051c88 100644 --- a/exachem/scf/scf_hartree_fock.hpp +++ b/exachem/scf/scf_hartree_fock.hpp @@ -93,7 +93,6 @@ class SCFHartreeFock { #if SCF_THROTTLE_RESOURCES ProcGroupData pgdata; ProcGroup pg; - int hf_nranks, hf_nnodes, t_nnodes; #endif #if defined(USE_SCALAPACK) diff --git a/exachem/task/ec_task.cpp b/exachem/task/ec_task.cpp index 5c46094..c62fa29 100644 --- a/exachem/task/ec_task.cpp +++ b/exachem/task/ec_task.cpp @@ -10,7 +10,7 @@ namespace exachem::task { -void ec_execute_task(ExecutionContext& ec, ChemEnv& chem_env, std::string ec_arg2) { +void execute_task(ExecutionContext& ec, ChemEnv& chem_env, std::string ec_arg2) { const auto task = chem_env.ioptions.task_options; const auto input_file = chem_env.input_file; @@ -54,4 +54,4 @@ void ec_execute_task(ExecutionContext& ec, ChemEnv& chem_env, std::string ec_arg "[ERROR] Unsupported task specified (or) code for the specified task is not built"); } -} // namespace exachem::task \ No newline at end of file +} // namespace exachem::task diff --git a/exachem/task/ec_task.hpp b/exachem/task/ec_task.hpp index c5eb1ec..f94be1e 100644 --- a/exachem/task/ec_task.hpp +++ b/exachem/task/ec_task.hpp @@ -35,5 +35,5 @@ using namespace exachem; #endif namespace exachem::task { -void ec_execute_task(ExecutionContext& ec, ChemEnv& chem_env, std::string ec_arg2); -} \ No newline at end of file +void execute_task(ExecutionContext& ec, ChemEnv& chem_env, std::string ec_arg2); +} diff --git a/inputs/example.json b/inputs/example.json index 1d3bcce..7fbfd56 100644 --- a/inputs/example.json +++ b/inputs/example.json @@ -17,6 +17,7 @@ "maxiter": 50, "debug": false, "file_prefix": "", + "output_dir": "", "comments": { "comment1": "example comment" } diff --git a/inputs/guanine_cytosine_3bp.json b/inputs/guanine_cytosine_3bp.json index 07b451b..25db8e2 100644 --- a/inputs/guanine_cytosine_3bp.json +++ b/inputs/guanine_cytosine_3bp.json @@ -98,7 +98,6 @@ "maxiter": 50 }, "SCF": { - "tol_lindep": 1e-10, "conve": 1e-06, "convd": 1e-05, "diis_hist": 10, diff --git a/inputs/h2.json b/inputs/h2.json index 8bbd3aa..cf9d096 100644 --- a/inputs/h2.json +++ b/inputs/h2.json @@ -23,7 +23,7 @@ "diagtol": 1e-08 }, "CC": { - "threshold": 1e-10, + "threshold": 1e-8, "GFCCSD": { "gf_ip": true, "gf_ea": false, diff --git a/inputs/ozone.json b/inputs/ozone.json index e458450..621e053 100644 --- a/inputs/ozone.json +++ b/inputs/ozone.json @@ -15,12 +15,10 @@ "restart": false }, "CD": { - "diagtol": 1e-06, - "max_cvecs": 8 + "diagtol": 1e-06 }, "CC": { - "threshold": 1e-10, - "tilesize": 30, + "threshold": 1e-8, "debug": false, "EOMCCSD": { "eom_nroots": 2, @@ -30,10 +28,10 @@ } }, "TASK": { - "scf": true, + "scf": false, "mp2": false, "cd_2e": false, - "ccsd": false, + "ccsd": true, "ccsd_t": false } } diff --git a/methods/ExaChem.cpp b/methods/ExaChem.cpp index 4f2c602..9880a37 100644 --- a/methods/ExaChem.cpp +++ b/methods/ExaChem.cpp @@ -79,13 +79,22 @@ int main(int argc, char* argv[]) { ECOptions& ioptions = chem_env.ioptions; chem_env.sys_data.input_molecule = ParserUtils::getfilename(input_file); + std::string output_dir = chem_env.ioptions.common_options.output_dir; if(chem_env.ioptions.common_options.file_prefix.empty()) { chem_env.ioptions.common_options.file_prefix = chem_env.sys_data.input_molecule; } + if(!output_dir.empty()) { + output_dir += "/"; + const auto test_file = output_dir + "ec_test_file.tmp"; + std::ofstream ofs(test_file); + if(!ofs) + tamm_terminate("Path provided as output_dir [" + + chem_env.ioptions.common_options.output_dir + "] is not writable!"); + } chem_env.sys_data.output_file_prefix = chem_env.ioptions.common_options.file_prefix + "." + chem_env.ioptions.common_options.basis; - chem_env.workspace_dir = chem_env.sys_data.output_file_prefix + "_files/"; + chem_env.workspace_dir = output_dir + chem_env.sys_data.output_file_prefix + "_files/"; if(rank == 0) { std::cout << chem_env.jinput.dump(2) << std::endl; @@ -154,7 +163,7 @@ int main(int argc, char* argv[]) { chem_env.shells = chem_env.ec_basis.shells; chem_env.sys_data.has_ecp = chem_env.ec_basis.has_ecp; - exachem::task::ec_execute_task(ec, chem_env, ec_arg2); + exachem::task::execute_task(ec, chem_env, ec_arg2); } // loop over input files