Skip to content

Commit

Permalink
Add output_dir option.
Browse files Browse the repository at this point in the history
Choleksy on sub-group.
Fix clang warnings.
Minor improvements to CC tilesize logic.
  • Loading branch information
ajaypanyala committed Oct 27, 2024
1 parent 7da1e0f commit 03fda28
Show file tree
Hide file tree
Showing 44 changed files with 521 additions and 411 deletions.
5 changes: 4 additions & 1 deletion docs/schema/input_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@
"debug": {
"type": "boolean"
},
"output_file_prefix": {
"file_prefix": {
"type": "string"
},
"output_dir": {
"type": "string"
},
"comments": {
"type": "object",
"properties": {
Expand Down
2 changes: 2 additions & 0 deletions docs/user_guide/basic_options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ Some common options supported are as follows:
:file_prefix: A string indicating the prefix for the name of the workspace folder where the results of a run are stored.
It also forms the prefix for the files written to the workspace folder. The *default prefix* is the name of the input file without the *.json* extension.

:output_dir: A string indicating the path to the directory where the where the results of a run are stored. If not provided, the default is the current working directory.

.. _Basis:

Basis set options
Expand Down
7 changes: 6 additions & 1 deletion exachem/cc/ccsd/cd_ccsd_cs_ann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ std::tuple<double, double> exachem::cc::ccsd::cd_ccsd_cs_driver(

iteration_print(chem_env, ec.pg(), iter, residual, energy, iter_time);

if(writet && (((iter + 1) % writet_iter == 0) || (residual < thresh))) {
if(writet && ((iter + 1) % writet_iter == 0)) {
write_to_disk(t1_aa, t1file);
write_to_disk(t2_abab, t2file);
}
Expand All @@ -792,6 +792,11 @@ std::tuple<double, double> exachem::cc::ccsd::cd_ccsd_cs_driver(
diis<T>(ec, rs, ts, next_t);
}

if(writet) {
write_to_disk(t1_aa, t1file);
write_to_disk(t2_abab, t2file);
}

if(profile && ec.print()) {
std::string profile_csv = ccsd_fp + "_profile.csv";
std::ofstream pds(profile_csv, std::ios::out);
Expand Down
19 changes: 10 additions & 9 deletions exachem/cc/ccsd/cd_ccsd_os_ann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ cd_ccsd_os_driver(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpace

iteration_print(chem_env, ec.pg(), iter, residual, energy, iter_time);

if(writet && (((iter + 1) % writet_iter == 0) /*|| (residual < thresh)*/)) {
if(writet && ((iter + 1) % writet_iter == 0)) {
write_to_disk(d_t1, t1file);
write_to_disk(d_t2, t2file);
}
Expand All @@ -1102,14 +1102,6 @@ cd_ccsd_os_driver(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpace
.deallocate(t2_copy)
.execute();
// clang-format on
if(writet) {
write_to_disk(d_t1, t1file);
write_to_disk(d_t2, t2file);
if(computeTData && chem_env.ioptions.ccsd_options.writev) {
fs::copy_file(t1file, ccsd_fp + ".fullT1amp", fs::copy_options::update_existing);
fs::copy_file(t2file, ccsd_fp + ".fullT2amp", fs::copy_options::update_existing);
}
}
break;
}
}
Expand All @@ -1129,6 +1121,15 @@ cd_ccsd_os_driver(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpace
diis<T>(ec, rs, ts, next_t);
}

if(writet) {
write_to_disk(d_t1, t1file);
write_to_disk(d_t2, t2file);
if(computeTData && chem_env.ioptions.ccsd_options.writev) {
fs::copy_file(t1file, ccsd_fp + ".fullT1amp", fs::copy_options::update_existing);
fs::copy_file(t2file, ccsd_fp + ".fullT2amp", fs::copy_options::update_existing);
}
}

if(profile && ec.print()) {
std::string profile_csv = ccsd_fp + "_profile.csv";
std::ofstream pds(profile_csv, std::ios::out);
Expand Down
4 changes: 2 additions & 2 deletions exachem/cc/ccsd_t/ccsd_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void exachem::cc::ccsd_t::ccsd_t_driver(ExecutionContext& ec, ChemEnv& chem_env)
int ga_cnn = ec.nnodes();
if(nsranks > ga_cnn) nsranks = ga_cnn;
nsranks = nsranks * GA_Cluster_nprocs(0);
int subranks[nsranks];
std::vector<int> subranks(nsranks);
for(int i = 0; i < nsranks; i++) subranks[i] = i;

#if defined(USE_UPCXX)
Expand All @@ -69,7 +69,7 @@ void exachem::cc::ccsd_t::ccsd_t_driver(ExecutionContext& ec, ChemEnv& chem_env)
MPI_Group world_group;
MPI_Comm_group(world_comm, &world_group);
MPI_Group subgroup;
MPI_Group_incl(world_group, nsranks, subranks, &subgroup);
MPI_Group_incl(world_group, nsranks, subranks.data(), &subgroup);
MPI_Comm subcomm;
MPI_Comm_create(world_comm, subgroup, &subcomm);
MPI_Group_free(&world_group);
Expand Down
24 changes: 12 additions & 12 deletions exachem/cc/ccsd_t/ccsd_t_all_fused_doubles1.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ void ccsd_t_data_d1_info_only(bool is_restricted, const Index noab, const Index
} // end ia6

// ia6 -- compute sizes and permutations
int idx_offset = 0;
int detailed_stats[noab][9];
int idx_offset = 0;
std::vector<std::array<int, 9>> detailed_stats(noab);

for(Index idx_noab = 0; idx_noab < noab; idx_noab++) {
detailed_stats[idx_noab][0] = 0;
Expand Down Expand Up @@ -394,7 +394,7 @@ void ccsd_t_data_d1_info_only(bool is_restricted, const Index noab, const Index
detailed_stats[h7b][7] = 0;
detailed_stats[h7b][8] = 0;

int idx_new_offset = 0;
// int idx_new_offset = 0;
if(!ia6_enabled[ia6 * noab + h7b]) continue;

size_t dim_common = k_range[h7b];
Expand All @@ -405,55 +405,55 @@ void ccsd_t_data_d1_info_only(bool is_restricted, const Index noab, const Index
if(ref_p456_h123 == cur_p456_h123) {
df_simple_d1_exec[0 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][0] = detailed_stats[h7b][0] + 1;
}
if(ref_p456_h123 == cur_p456_h213) {
df_simple_d1_exec[1 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][1] = detailed_stats[h7b][1] + 1;
}
if(ref_p456_h123 == cur_p456_h231) {
df_simple_d1_exec[2 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][2] = detailed_stats[h7b][2] + 1;
}
if(ref_p456_h123 == cur_p645_h123) {
df_simple_d1_exec[3 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][3] = detailed_stats[h7b][3] + 1;
}
if(ref_p456_h123 == cur_p645_h213) {
df_simple_d1_exec[4 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][4] = detailed_stats[h7b][4] + 1;
}
if(ref_p456_h123 == cur_p645_h231) {
df_simple_d1_exec[5 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][5] = detailed_stats[h7b][5] + 1;
}
if(ref_p456_h123 == cur_p465_h123) {
df_simple_d1_exec[6 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][6] = detailed_stats[h7b][6] + 1;
}
if(ref_p456_h123 == cur_p465_h213) {
df_simple_d1_exec[7 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][7] = detailed_stats[h7b][7] + 1;
}
if(ref_p456_h123 == cur_p465_h231) {
df_simple_d1_exec[8 + h7b * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[h7b][8] = detailed_stats[h7b][8] + 1;
}

Expand Down
24 changes: 12 additions & 12 deletions exachem/cc/ccsd_t/ccsd_t_all_fused_doubles2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,8 @@ void ccsd_t_data_d2_info_only(bool is_restricted, const Index noab, const Index
} // end ia6

// ia6 -- compute sizes and permutations
int idx_offset = 0;
int detailed_stats[nvab][9];
int idx_offset = 0;
std::vector<std::array<int, 9>> detailed_stats(nvab);

for(Index idx_nvab = 0; idx_nvab < nvab; idx_nvab++) {
detailed_stats[idx_nvab][0] = 0;
Expand Down Expand Up @@ -447,7 +447,7 @@ void ccsd_t_data_d2_info_only(bool is_restricted, const Index noab, const Index
detailed_stats[p7b - noab][7] = 0;
detailed_stats[p7b - noab][8] = 0;

int idx_new_offset = 0;
// int idx_new_offset = 0;
if(!ia6_enabled[ia6 * nvab + p7b - noab]) continue;

size_t dim_common = k_range[p7b];
Expand All @@ -458,55 +458,55 @@ void ccsd_t_data_d2_info_only(bool is_restricted, const Index noab, const Index
if(ref_p456_h123 == cur_p456_h123) {
df_simple_d2_exec[0 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][0] = detailed_stats[p7b - noab][0] + 1;
}
if(ref_p456_h123 == cur_p456_h312) {
df_simple_d2_exec[1 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][1] = detailed_stats[p7b - noab][1] + 1;
}
if(ref_p456_h123 == cur_p456_h132) {
df_simple_d2_exec[2 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][2] = detailed_stats[p7b - noab][2] + 1;
}
if(ref_p456_h123 == cur_p546_h123) {
df_simple_d2_exec[3 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][3] = detailed_stats[p7b - noab][3] + 1;
}
if(ref_p456_h123 == cur_p546_h312) {
df_simple_d2_exec[4 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][4] = detailed_stats[p7b - noab][4] + 1;
}
if(ref_p456_h123 == cur_p546_h132) {
df_simple_d2_exec[5 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][5] = detailed_stats[p7b - noab][5] + 1;
}
if(ref_p456_h123 == cur_p564_h123) {
df_simple_d2_exec[6 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][6] = detailed_stats[p7b - noab][6] + 1;
}
if(ref_p456_h123 == cur_p564_h312) {
df_simple_d2_exec[7 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][7] = detailed_stats[p7b - noab][7] + 1;
}
if(ref_p456_h123 == cur_p564_h132) {
df_simple_d2_exec[8 + (p7b - noab) * 9] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[p7b - noab][8] = detailed_stats[p7b - noab][8] + 1;
}

Expand Down
22 changes: 11 additions & 11 deletions exachem/cc/ccsd_t/ccsd_t_all_fused_singles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,8 @@ void ccsd_t_data_s1_info_only(bool is_restricted, const Index noab, const Index
detailed_stats[8] = 0;

// ia6 -- compute sizes and permutations
int idx_offset = 0;
int idx_new_offset = 0;
int idx_offset = 0;
// int idx_new_offset = 0;
for(auto ia6 = 0; ia6 < 9; ia6++) {
if(!ia6_enabled[ia6]) { continue; }

Expand All @@ -474,55 +474,55 @@ void ccsd_t_data_s1_info_only(bool is_restricted, const Index noab, const Index
if(ref_p456_h123 == cur_p456_h123) {
df_simple_s1_exec[0] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[0] = detailed_stats[0] + 1;
}
if(ref_p456_h123 == cur_p456_h213) {
df_simple_s1_exec[1] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[1] = detailed_stats[1] + 1;
}
if(ref_p456_h123 == cur_p456_h231) {
df_simple_s1_exec[2] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[2] = detailed_stats[2] + 1;
}
if(ref_p456_h123 == cur_p546_h123) {
df_simple_s1_exec[3] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[3] = detailed_stats[3] + 1;
}
if(ref_p456_h123 == cur_p546_h213) {
df_simple_s1_exec[4] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[4] = detailed_stats[4] + 1;
}
if(ref_p456_h123 == cur_p546_h231) {
df_simple_s1_exec[5] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[5] = detailed_stats[5] + 1;
}
if(ref_p456_h123 == cur_p564_h123) {
df_simple_s1_exec[6] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[6] = detailed_stats[6] + 1;
}
if(ref_p456_h123 == cur_p564_h213) {
df_simple_s1_exec[7] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[7] = detailed_stats[7] + 1;
}
if(ref_p456_h123 == cur_p564_h231) {
df_simple_s1_exec[8] = idx_offset;
*num_enabled_kernels = *num_enabled_kernels + 1;
idx_new_offset++;
// idx_new_offset++;
detailed_stats[8] = detailed_stats[8] + 1;
}
//
Expand Down
4 changes: 2 additions & 2 deletions exachem/cc/ccsd_t/ccsd_t_fused_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ std::tuple<T, T, double, double> ccsd_t_fused_driver_new(
static_cast<T*>(memDevPool.allocate(sizeof(T) * std::pow(max_num_blocks, 6) * 2));
#endif

int num_task = 0;
// int num_task = 0;
// if(!seq_h3b) {
// if(rank == 0) {
// std::cout << "456123 parallel 6d loop variant" << std::endl << std::endl;
Expand Down Expand Up @@ -354,7 +354,7 @@ std::tuple<T, T, double, double> ccsd_t_fused_driver_new(
else if((t_h1b == t_h2b) || (t_h2b == t_h3b)) { factor /= 2.0; }

//
num_task++;
// num_task++;

#if defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP)
ccsd_t_fully_fused_none_df_none_task<T>(
Expand Down
8 changes: 4 additions & 4 deletions exachem/cc/gfcc/gfccsd_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2534,16 +2534,16 @@ void gfccsd_driver(ExecutionContext& ec, ChemEnv& chem_env) {

int nsranks = sys_data.nbf / 15;
if(nsranks < 1) nsranks = 1;
int ga_cnn = GA_Cluster_nnodes();
int ga_cnn = ec.nnodes();
if(nsranks > ga_cnn) nsranks = ga_cnn;
nsranks = nsranks * GA_Cluster_nprocs(0);
int subranks[nsranks];
nsranks = nsranks * ec.ppn();
std::vector<int> subranks(nsranks);
for(int i = 0; i < nsranks; i++) subranks[i] = i;
auto world_comm = ec.pg().comm();
MPI_Group world_group;
MPI_Comm_group(world_comm, &world_group);
MPI_Group subgroup;
MPI_Group_incl(world_group, nsranks, subranks, &subgroup);
MPI_Group_incl(world_group, nsranks, subranks.data(), &subgroup);
MPI_Comm subcomm;
MPI_Comm_create(world_comm, subgroup, &subcomm);
MPI_Group_free(&subgroup);
Expand Down
4 changes: 2 additions & 2 deletions exachem/cc/scripts/ccsd_advisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,9 @@ def parseargs(argv=None):


VabOab = v_alpha*o_beta*v_beta*o_alpha
ts_guess=50
ts_guess=40
ts_max=ts_guess
tilesizes = list(range(ts_guess, 301, 10))
tilesizes = list(range(ts_guess, 181, 10))

def get_ts_recommendation(tilesizes,nranks):
ts_guess_ = tilesizes[0]
Expand Down
Loading

0 comments on commit 03fda28

Please sign in to comment.