Skip to content

Commit

Permalink
remove duplicated code in parser
Browse files Browse the repository at this point in the history
[CD] add skip_cd option, change write_cv option syntax
[CC] cleanup profiling header
  • Loading branch information
ajaypanyala committed Apr 28, 2024
1 parent 9f000f2 commit e65360c
Show file tree
Hide file tree
Showing 28 changed files with 134 additions and 392 deletions.
3 changes: 1 addition & 2 deletions ci/reference_output/ch4.def2-tzvp.ccsd_t.json
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,7 @@
"CD": {
"diagtol": 1e-12,
"itilesize": 1000,
"write_cv": false,
"write_vcount": 5000,
"write_cv": [false, 5000],
"max_cvecs_factor": 40
},
"CCSD": {
Expand Down
3 changes: 1 addition & 2 deletions ci/reference_output/co.cc-pvdz.gfccsd.json
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,7 @@
"CD": {
"diagtol": 1e-08,
"itilesize": 1000,
"write_cv": false,
"write_vcount": 5000,
"write_cv": [false, 5000],
"max_cvecs_factor": 12
},
"CCSD": {
Expand Down
3 changes: 1 addition & 2 deletions ci/reference_output/h2o_ducc.cc-pvdz.ducc.json
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@
"CD": {
"diagtol": 1e-12,
"itilesize": 1000,
"write_cv": false,
"write_vcount": 5000,
"write_cv": [false, 5000],
"max_cvecs_factor": 40
},
"CCSD": {
Expand Down
3 changes: 1 addition & 2 deletions ci/reference_output/h2o_eom.cc-pvdz.eomccsd.json
Original file line number Diff line number Diff line change
Expand Up @@ -622,8 +622,7 @@
"CD": {
"diagtol": 1e-12,
"itilesize": 1000,
"write_cv": false,
"write_vcount": 5000,
"write_cv": [false, 5000],
"max_cvecs_factor": 40
},
"CCSD": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,7 @@
"CD": {
"diagtol": 1e-05,
"itilesize": 1000,
"write_cv": true,
"write_vcount": 5000,
"write_cv": [true, 5000],
"max_cvecs_factor": 12
},
"CCSD": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,7 @@
"CD": {
"diagtol": 1e-05,
"itilesize": 1000,
"write_cv": true,
"write_vcount": 5000,
"write_cv": [true, 5000],
"max_cvecs_factor": 12
},
"CCSD": {
Expand Down
18 changes: 12 additions & 6 deletions docs/schema/input_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@
"writem": {
"type": "number"
},
"riscf": {
"type": "string"
},
"restart": {
"type": "boolean"
},
"noscf": {
"type": "boolean"
},
"direct_df": {
"type": "boolean"
},
"guess": {
"type": "array",
"sad": {
Expand Down Expand Up @@ -211,11 +211,17 @@
"diagtol": {
"type": "number"
},
"write_vcount": {
"type": "number"
"skip_cd": {
"type": "array",
"items": {
"type": "string"
}
},
"write_cv": {
"type": "boolean"
"type": "array",
"items": {
"type": "string"
}
},
"itilesize": {
"type": "number"
Expand Down
8 changes: 3 additions & 5 deletions docs/user_guide/cholesky_decomposition.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ Options used in the Cholesky decomposition of atomic-orbital based two-electron
"CD": {
"diagonal": 1e-5,
"itilesize": 1000,
"write_cv": false,
"write_vcount": 5000
"write_cv": [false,5000]
}
:diagonal: ``[default=1e-5]`` The diagonal threshold used to terminate the decomposition procedure and truncate the Cholesky vectors.
Expand All @@ -34,7 +33,6 @@ Options used in the Cholesky decomposition of atomic-orbital based two-electron

The following options are applicable only for calculations involving :math:`\geq` 1000 basis functions. They are used for restarting the cholesky decomposition procedure.

:write_cv: ``[default=false]`` When enabled, it performs parallel IO to write the tensor containing the AO cholesky vectors to disk. Enabling this option implies restart.

:write_vcount: ``[default=5000]`` The cholesky vectors are written to disk after every *write_vcount* vectors are computed.
:write_cv: ``[default=[false,5000]]`` When enabled, it performs parallel IO to write the tensor containing the AO cholesky vectors to disk. Enabling this option implies restart.
The integer represents a count, indicating that the Cholesky vectors should be written to disk after every *count* vectors are computed.

5 changes: 1 addition & 4 deletions exachem/cc/cc2/cd_cc2_cs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,10 +517,7 @@ std::tuple<double, double> cc2_cs::cd_cc2_cs_driver(
std::string profile_csv = cd_cc2_fp + "_profile.csv";
std::ofstream pds(profile_csv, std::ios::out);
if(!pds) std::cerr << "Error opening file " << profile_csv << std::endl;
std::string header = "ID;Level;OP;total_op_time_min;total_op_time_max;total_op_time_avg;";
header += "get_time_min;get_time_max;get_time_avg;gemm_time_min;";
header += "gemm_time_max;gemm_time_avg;acc_time_min;acc_time_max;acc_time_avg";
pds << header << std::endl;
pds << ec.get_profile_header() << std::endl;
pds << ec.get_profile_data().str() << std::endl;
pds.close();
}
Expand Down
5 changes: 1 addition & 4 deletions exachem/cc/cc2/cd_cc2_os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -688,10 +688,7 @@ std::tuple<double, double> cc2_os::cd_cc2_os_driver(
std::string profile_csv = cc2_fp + "_profile.csv";
std::ofstream pds(profile_csv, std::ios::out);
if(!pds) std::cerr << "Error opening file " << profile_csv << std::endl;
std::string header = "ID;Level;OP;total_op_time_min;total_op_time_max;total_op_time_avg;";
header += "get_time_min;get_time_max;get_time_avg;gemm_time_min;";
header += "gemm_time_max;gemm_time_avg;acc_time_min;acc_time_max;acc_time_avg";
pds << header << std::endl;
pds << ec.get_profile_header() << std::endl;
pds << ec.get_profile_data().str() << std::endl;
pds.close();
}
Expand Down
36 changes: 25 additions & 11 deletions exachem/cc/ccsd/ccsd_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,21 +512,34 @@ exachem::cd_svd::cd_svd_driver(ChemEnv& chem_env, ExecutionContext& ec, TiledInd
std::string files_dir = out_fp + chem_env.ioptions.scf_options.scf_type;
std::string lcaofile = files_dir + "/" + sys_data.output_file_prefix + ".lcao";

if(!readv2) {
auto skip_cd = cd_options.skip_cd;

if(!readv2 && !skip_cd.first) {
two_index_transform(chem_env, ec, C_AO, F_AO, C_beta_AO, F_beta_AO, d_f1, shells, lcao,
is_dlpno || !is_mso);
if(!is_dlpno)
cholVpr = cd_svd(chem_env, ec, MO, AO, chol_count, max_cvecs, shells, lcao, is_mso);
write_to_disk<TensorType>(lcao, lcaofile);
}
else {
std::ifstream in(cholfile, std::ios::in);
int rstatus = 0;
if(in.is_open()) rstatus = 1;
if(rstatus == 1) in >> chol_count;
else tamm_terminate("Error reading " + cholfile);

if(rank == 0) cout << "Number of cholesky vectors to be read = " << chol_count << endl;
if(!skip_cd.first) {
std::ifstream in(cholfile, std::ios::in);
int rstatus = 0;
if(in.is_open()) rstatus = 1;
if(rstatus == 1) in >> chol_count;
else tamm_terminate("Error reading " + cholfile);

if(rank == 0)
cout << "Number of cholesky vectors to be read from disk = " << chol_count << endl;
}
else {
chol_count = skip_cd.second;
if(rank == 0)
cout << endl
<< "Skipping Cholesky Decomposition... using user provided cholesky vector count of "
<< chol_count << endl
<< endl;
}

if(!is_dlpno) update_sysdata(chem_env, MO, is_mso);

Expand All @@ -537,17 +550,18 @@ exachem::cd_svd::cd_svd_driver(ChemEnv& chem_env, ExecutionContext& ec, TiledInd
cholVpr = {{N, N, CI}, {SpinPosition::upper, SpinPosition::lower, SpinPosition::ignore}};
if(!is_dlpno) Tensor<TensorType>::allocate(&ec, cholVpr);
// Scheduler{ec}(cholVpr()=0).execute();
read_from_disk(lcao, lcaofile);
if(!skip_cd.first) read_from_disk(lcao, lcaofile);
}

auto hf_t2 = std::chrono::high_resolution_clock::now();
double cd_svd_time =
std::chrono::duration_cast<std::chrono::duration<double>>((hf_t2 - hf_t1)).count();

if(rank == 0)
if(rank == 0 && !skip_cd.first)
std::cout << std::endl
<< "Total Time taken for Cholesky Decomposition: " << std::fixed
<< std::setprecision(2) << cd_svd_time << " secs" << std::endl;
<< std::setprecision(2) << cd_svd_time << " secs" << std::endl
<< std::endl;

Tensor<T>::deallocate(C_AO, F_AO);
if(sys_data.is_unrestricted) Tensor<T>::deallocate(C_beta_AO, F_beta_AO);
Expand Down
31 changes: 18 additions & 13 deletions exachem/cc/ccsd/cd_ccsd_cs_ann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,9 @@ void exachem::cc::ccsd::ccsd_t2_cs(Scheduler& sch, const TiledIndexSpace& MO,
auto btensor = rhs2_.tensor();
// for(auto itval=loop_nest.begin(); itval!=loop_nest.end(); ++itval) {}

auto compute_v4_term = [=](const IndexVector& cblkid, span<T> cbuf) {
auto& oprof = tamm::OpProfiler::instance();

auto compute_v4_term = [=, &oprof](const IndexVector& cblkid, span<T> cbuf) {
auto& memHostPool = tamm::RMMMemoryManager::getInstance().getHostMemoryPool();

// compute blockids from the loop indices. itval is the loop index
Expand Down Expand Up @@ -404,18 +406,21 @@ void exachem::cc::ccsd::ccsd_t2_cs(Scheduler& sch, const TiledIndexSpace& MO,
static_cast<TensorElType2*>(memDevicePool.allocate(asize * sizeof(TensorElType2)));
bbuf_dev =
static_cast<TensorElType3*>(memDevicePool.allocate(bsize * sizeof(TensorElType3)));

TimerGuard tg_copy{&oprof.multOpCopyTime};
gpuMemcpyAsync<TensorElType2>(abuf_dev, abuf, asize, gpuMemcpyHostToDevice, thandle);
gpuMemcpyAsync<TensorElType3>(bbuf_dev, bbuf, bsize, gpuMemcpyHostToDevice, thandle);
}
#endif

kernels::block_multiply<T, TensorElType1, TensorElType2, TensorElType3>(
{
TimerGuard tg_dgemm{&oprof.multOpDgemmTime};
kernels::block_multiply<T, TensorElType1, TensorElType2, TensorElType3>(
#if defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP)
abuf_dev, bbuf_dev,
abuf_dev, bbuf_dev,
#endif
thandle, 1.0, abuf, adims_sz, rhs1_int_labels_, bbuf, bdims_sz, rhs2_int_labels_, cscale,
cbuf.data(), cdims_sz, lhs_int_labels_, hw, false, cbuf_dev_ptr, cbuf_tmp_dev_ptr);
thandle, 1.0, abuf, adims_sz, rhs1_int_labels_, bbuf, bdims_sz, rhs2_int_labels_,
cscale, cbuf.data(), cdims_sz, lhs_int_labels_, hw, false, cbuf_dev_ptr,
cbuf_tmp_dev_ptr);
}

#if(defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP))
if(hw == ExecutionHW::GPU) {
Expand All @@ -437,8 +442,11 @@ void exachem::cc::ccsd::ccsd_t2_cs(Scheduler& sch, const TiledIndexSpace& MO,
TensorElType1* cbuf_tmp{nullptr};
cbuf_tmp = static_cast<TensorElType1*>(memHostPool.allocate(csize * sizeof(TensorElType1)));
std::memset(cbuf_tmp, 0, csize * sizeof(TensorElType1));
gpuMemcpyAsync<TensorElType1>(cbuf_tmp, cbuf_dev_ptr, csize, gpuMemcpyDeviceToHost,
thandle);
{
TimerGuard tg_copy{&oprof.multOpCopyTime};
gpuMemcpyAsync<TensorElType1>(cbuf_tmp, cbuf_dev_ptr, csize, gpuMemcpyDeviceToHost,
thandle);
}
// cbuf+=cbuf_tmp
gpuStreamSynchronize(thandle);
blas::axpy(csize, TensorElType1{1}, cbuf_tmp, 1, cbuf.data(), 1);
Expand Down Expand Up @@ -786,10 +794,7 @@ std::tuple<double, double> exachem::cc::ccsd::cd_ccsd_cs_driver(
std::string profile_csv = ccsd_fp + "_profile.csv";
std::ofstream pds(profile_csv, std::ios::out);
if(!pds) std::cerr << "Error opening file " << profile_csv << std::endl;
std::string header = "ID;Level;OP;total_op_time_min;total_op_time_max;total_op_time_avg;";
header += "get_time_min;get_time_max;get_time_avg;gemm_time_min;";
header += "gemm_time_max;gemm_time_avg;acc_time_min;acc_time_max;acc_time_avg";
pds << header << std::endl;
pds << ec.get_profile_header() << std::endl;
pds << ec.get_profile_data().str() << std::endl;
pds.close();
}
Expand Down
31 changes: 18 additions & 13 deletions exachem/cc/ccsd/cd_ccsd_os_ann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,10 @@ void ccsd_t2_os(Scheduler& sch, const TiledIndexSpace& MO, const TiledIndexSpace
// "_a022( abab )(p3_va,p4_vb,p2_va,p1_vb) = 1.0 * _a021_os( aa )(p3_va,p2_va,cind) *
// _a021_os( bb )(p4_vb,p1_vb,cind)")

int a22_flag = 0;
auto compute_v4_term = [=, &a22_flag](const IndexVector& cblkid, span<T> cbuf) {
int a22_flag = 0;
auto& oprof = tamm::OpProfiler::instance();

auto compute_v4_term = [=, &a22_flag, &oprof](const IndexVector& cblkid, span<T> cbuf) {
Tensor<T> a22_tmp;
LabeledTensor<T>*lhsp_{nullptr}, *rhs1p_{nullptr}, *rhs2p_{nullptr};

Expand Down Expand Up @@ -520,13 +522,16 @@ void ccsd_t2_os(Scheduler& sch, const TiledIndexSpace& MO, const TiledIndexSpace
abptr->abuf_ = abuf;
abptr->bbuf_ = bbuf;

kernels::block_multiply<T, TensorElType1, TensorElType2, TensorElType3>(
{
TimerGuard tg_dgemm{&oprof.multOpDgemmTime};
kernels::block_multiply<T, TensorElType1, TensorElType2, TensorElType3>(
#if defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP)
abptr->ta_, abptr->tb_,
abptr->ta_, abptr->tb_,
#endif
thandle, 1.0, abptr->abuf_, adims_sz, rhs1_int_labels_, abptr->bbuf_, bdims_sz,
rhs2_int_labels_, cscale, cbuf.data(), cdims_sz, lhs_int_labels_, hw, false, cbuf_dev_ptr,
cbuf_tmp_dev_ptr);
thandle, 1.0, abptr->abuf_, adims_sz, rhs1_int_labels_, abptr->bbuf_, bdims_sz,
rhs2_int_labels_, cscale, cbuf.data(), cdims_sz, lhs_int_labels_, hw, false,
cbuf_dev_ptr, cbuf_tmp_dev_ptr);
}

#if(defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP))
if(hw == ExecutionHW::GPU) {
Expand All @@ -548,8 +553,11 @@ void ccsd_t2_os(Scheduler& sch, const TiledIndexSpace& MO, const TiledIndexSpace
TensorElType1* cbuf_tmp{nullptr};
cbuf_tmp = static_cast<TensorElType1*>(memHostPool.allocate(csize * sizeof(TensorElType1)));
std::memset(cbuf_tmp, 0, csize * sizeof(TensorElType1));
gpuMemcpyAsync<TensorElType1>(cbuf_tmp, cbuf_dev_ptr, csize, gpuMemcpyDeviceToHost,
thandle);
{
TimerGuard tg_copy{&oprof.multOpCopyTime};
gpuMemcpyAsync<TensorElType1>(cbuf_tmp, cbuf_dev_ptr, csize, gpuMemcpyDeviceToHost,
thandle);
}
// cbuf+=cbuf_tmp
gpuStreamSynchronize(thandle);
blas::axpy(csize, TensorElType1{1}, cbuf_tmp, 1, cbuf.data(), 1);
Expand Down Expand Up @@ -1119,10 +1127,7 @@ cd_ccsd_os_driver(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpace
std::string profile_csv = ccsd_fp + "_profile.csv";
std::ofstream pds(profile_csv, std::ios::out);
if(!pds) std::cerr << "Error opening file " << profile_csv << std::endl;
std::string header = "ID;Level;OP;total_op_time_min;total_op_time_max;total_op_time_avg;";
header += "get_time_min;get_time_max;get_time_avg;gemm_time_min;";
header += "gemm_time_max;gemm_time_avg;acc_time_min;acc_time_max;acc_time_avg";
pds << header << std::endl;
pds << ec.get_profile_header() << std::endl;
pds << ec.get_profile_data().str() << std::endl;
pds.close();
}
Expand Down
17 changes: 8 additions & 9 deletions exachem/cc/cd_svd/cd_svd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,11 @@ exachem::cd_svd::cd_svd(ChemEnv& chem_env, ExecutionContext& ec, TiledIndexSpace
using libint2::Operator;
using libint2::Shell;

SystemData& sys_data = chem_env.sys_data;
const auto cd_options = chem_env.ioptions.cd_options;
const bool write_cv = cd_options.write_cv;
const double diagtol = cd_options.diagtol;
const int write_vcount = cd_options.write_vcount;
const tamm::Tile itile_size = cd_options.itilesize;
SystemData& sys_data = chem_env.sys_data;
const auto cd_options = chem_env.ioptions.cd_options;
const auto write_cv = cd_options.write_cv;
const double diagtol = cd_options.diagtol;
const tamm::Tile itile_size = cd_options.itilesize;
// const TAMM_GA_SIZE northo = sys_data.nbf;
const TAMM_GA_SIZE nao = sys_data.nbf_orig;

Expand Down Expand Up @@ -405,7 +404,7 @@ exachem::cd_svd::cd_svd(ChemEnv& chem_env, ExecutionContext& ec, TiledIndexSpace
Engine engine(Operator::coulomb, max_nprim(shells), max_l(shells), 0);
const auto& buf = engine.results();

bool cd_restart = write_cv && fs::exists(diag_ao_file) && fs::exists(chol_ao_file) &&
bool cd_restart = write_cv.first && fs::exists(diag_ao_file) && fs::exists(chol_ao_file) &&
fs::exists(cv_count_file);

auto compute_diagonals = [&](const IndexVector& blockid) {
Expand Down Expand Up @@ -830,15 +829,15 @@ exachem::cd_svd::cd_svd(ChemEnv& chem_env, ExecutionContext& ec, TiledIndexSpace

#if !defined(USE_UPCXX)
// Restart
if(write_cv && count % write_vcount == 0 && nbf > 1000) { write_chol_vectors(); }
if(write_cv.first && count % write_cv.second == 0 && nbf > 1000) { write_chol_vectors(); }
#endif

} // while

if(rank == 0) std::cout << endl << "- Total number of cholesky vectors = " << count << std::endl;

#if !defined(USE_UPCXX)
if(write_cv && nbf > 1000) write_chol_vectors();
if(write_cv.first && nbf > 1000) write_chol_vectors();
#endif

Tensor<TensorType>::deallocate(g_d_tamm, g_r_tamm);
Expand Down
5 changes: 1 addition & 4 deletions exachem/cc/rteom/rt_eom_cd_ccsd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,10 +956,7 @@ void rt_eom_cd_ccsd(ChemEnv& chem_env, ExecutionContext& ec, const TiledIndexSpa
std::string profile_csv = rt_eom_fp + "_profile.csv";
std::ofstream pds(profile_csv, std::ios::out);
if(!pds) std::cerr << "Error opening file " << profile_csv << std::endl;
std::string header = "ID;Level;OP;total_op_time_min;total_op_time_max;total_op_time_avg;";
header += "get_time_min;get_time_max;get_time_avg;gemm_time_min;";
header += "gemm_time_max;gemm_time_avg;acc_time_min;acc_time_max;acc_time_avg";
pds << header << std::endl;
pds << ec.get_profile_header() << std::endl;
pds << ec.get_profile_data().str() << std::endl;
pds.close();
}
Expand Down
Loading

0 comments on commit e65360c

Please sign in to comment.