diff --git a/docs/user_guide/scf.rst b/docs/user_guide/scf.rst index a77e226..d944726 100644 --- a/docs/user_guide/scf.rst +++ b/docs/user_guide/scf.rst @@ -33,13 +33,30 @@ The values listed below are the defaults where few options are automatically adj "damp": 100, "nnodes": 1, "writem": 1, + "debug": false, "restart": false, "noscf": false, "scf_type": "restricted", - "xc_type": [], - "xc_grid_type": "UltraFine", - "direct_df": false, - "debug": false, + "direct_df": false, + "DFT": { + "snK": false, + "xc_type": [], + "xc_grid_type": "UltraFine", + "xc_pruning_scheme": "Robust", + "xc_rad_quad": "MK", + "xc_weight_scheme": "SSF", + "xc_exec_space": "Host", + "xc_basis_tol": 1e-10, + "xc_batch_size": 512, + "xc_snK_etol": 1e-10, + "xc_snK_ktol": 1e-10, + "xc_lb_kernel": "Default", + "xc_mw_kernel": "Default", + "xc_int_kernel": "Default", + "xc_red_kernel": "Default", + "xc_lwd_kernel": "Default", + "xc_radang_size": [0, 0] + }, "guess": { "atom_options":{ "Fe1": [1,2], @@ -93,26 +110,90 @@ The values listed below are the defaults where few options are automatically adj :noscf: ``[default=false]`` Computes only the SCF energy upon restart. +:debug: ``[default=false]`` enable verbose printing for debugging a calculation. + :scf_type: ``[default=restricted]`` The following values are supported * :strong:`restricted`: for closed-shell restricted Hartree-Fock (RHF) calculation * :strong:`unrestricted`: for spin-unrestricted Hartree-Fock (UHF) calculation +:direct_df: ``[default=false]`` Requests the direct computation of the density-fitted Coulomb contribution. Works only for pure Kohn-Sham fnctionals (no exact exchange) and with a provided ``df_basisset`` (see :ref:`Basis set options `). + +:snK: ``[default=false]`` Computes the exact exchange contribution using the seminumerical approach implemented in `GauXC`. + :xc_type: ``[default=[]]`` A list of strings specifying the exchange and correlation functionals for DFT calculations using `GauXC `_. The list of available functionals using the `builtin` backend can be found at the `ExchCXX `_ repository. - The `Libxc` backend is restricted to the list of LDA and GGA functionals without range separation available at `Libxc `_. + The `Libxc` backend is restricted to the list of functionals **without** range separation available at `Libxc `_. :xc_grid_type: ``[default=UltraFine]`` Specifies the quality of the numerical integration grid. The following values are supported * :strong:`Fine`: 75 radial shells with 302 angular points per shell. * :strong:`UltraFine`: 99 radial shells with 590 angular points per shell. * :strong:`SuperFine`: 175 radial shells with 974 angular points per shell for first row elements and 250 radial shells with 974 Lebedev points per shell for the rest. + * :strong:`GM3` + * :strong:`GM5` - All **xc_grid_type** options use a `Mura-Knowles` radial quadrature, a `Lebedev-Laikov` angular quadrature, a `Laqua-Kussmann-Ochsenfeld` partitioning scheme, and a `Robust` pruning method. +:xc_pruning_scheme: ``[default=Robust]`` Specifies the `GauXC` pruning scheme. The following values are supported -:direct_df: ``[default=false]`` Requests the direct computation of the density-fitted Coulomb contribution. Works only for pure Kohn-Sham fnctionals (no exact exchange) and with a provided ``df_basisset`` (see :ref:`Basis set options `). + * :strong:`Treutler` + * :strong:`Robust` + * :strong:`Unpruned` -:debug: ``[default=false]`` enable verbose printing for debugging a calculation. +:xc_rad_quad: ``[default=MK]`` Specifies the `GauXC` radial quadrature. The following values are supported + + * :strong:`MK` Mura-Knowles radial quadrature. + * :strong:`TA` Treutler-Ahlrichs radial quadrature. + * :strong:`MHL` Murray-Handy-Laming radial quadrature. + +:xc_weight_scheme: ``[default=SSF]`` Specifies the `GauXC` partitioning scheme. The following values are supported + + * :strong:`SSF` Stratman-Scuseria-Frisch partitioning scheme. + * :strong:`Becke` Becke partitioning scheme. + * :strong:`LKO` Laqua-Kussmann-Ochsenfeld partitioning scheme. + +:xc_exec_space: ``[default=Host]`` Specifies the `GauXC` execution space for the load balancer *and* integrator. The following values are supported + + * :strong:`Host` Use the CPU execution space. + * :strong:`Device` Use the GPU execution space. Only meaningful when GPU support was enabled during compilation. By default, `TAMM` reserves up to 80% of the GPU memory and only 10% is made available to `GauXC`. The `TAMM_GPU_POOL` environment variable can be used to modify the percentage of GPU memory reserved for `TAMM` and `GauXC` (`90-TAMM_GPU_POOL`). + +:xc_basis_tol: ``[default=1e-8]`` Specifies the `GauXC` basis tolerance. + +:xc_batch_size: ``[default=2048]`` Specifies the `GauXC` batch size. + +:xc_snK_etol: ``[default=1e-10]`` Specifies the `GauXC` snK energy tolerance. If `conve < xc_snK_etol`, the `xc_snK_etol` tolerance will be automatically set to the `conve` value. + +:xc_snK_ktol: ``[default=1e-10]`` Specifies the `GauXC` K matrix tolerance. If `conve * 1e-2 < xc_snK_ktol`, the `xc_snK_ktol` tolerance will be automatically set to `conve * 1e-2`. + +:xc_int_kernel: ``[default=Default]`` Specifies the `GauXC` Integrator Kernel. + + * :strong:`Default` Uses `Replicated` or `Incore` for `Host` and `Device` execution spaces, respectively. + * :strong:`Replicated` Only available for the `Host` execution space. + * :strong:`Incore` Only available for the `Device` execution space. + * :strong:`ShellBatched` Only available for the `Device` execution space. + +:xc_lwd_kernel: ``[default=Default]`` Specifies the `GauXC` Local Work Driver Kernel. + + * :strong:`Default` Uses the `Reference` or `Scheme1` kernels for `Host` and `Device` execution spaces, respectively. + * :strong:`Reference` Only available for the `Host` execution space. + * :strong:`Scheme1` Only available for the `Device` execution space. + * :strong:`Scheme1-Cutlass` Only available for the `Device` execution space. `GauXC` must be compiled setting `GAUXC_ENABLE_CUTLASS=ON`. + +:xc_red_kernel: ``[default=Default]`` Specifies the `GauXC` Reduction Kernel. + + * :strong:`Default` Uses the `BasicMPI` reduction kernel. + * :strong:`BasicMPI` + * :strong:`NCCL` Only available for the `Device` execution space. `GauXC` must be compiled setting `GAUXC_ENABLE_NCCL=ON`. + +:xc_lb_kernel: ``[default=Default]`` Specifies the `GauXC` Load Balancer Kernel. + + * :strong:`Default` Uses the `Replicated` reduction kernel. + * :strong:`Replicated` Alias for `Replicated-Petite` in the `Host` execution space. + * :strong:`Replicated-Petite` Only available for the `Host` execution space. + * :strong:`Replicated-Fillin` Only available for the `Host` execution space. + +:xc_mw_kernel: ``[default=Default]`` Specifies the `GauXC` Molecular Weights Kernel. + + * :strong:`Default` :nnodes: On a distributed machine, the number of processors for an SCF run is chosen by default depending on the problem size (i.e. number of basis functions **Nbf**). If a larger number of processors than required are used, the SCF module automatically chooses a smaller subset of processors for the calculation. diff --git a/exachem/common/chemenv.cpp b/exachem/common/chemenv.cpp index 5098fd5..7c96188 100644 --- a/exachem/common/chemenv.cpp +++ b/exachem/common/chemenv.cpp @@ -80,6 +80,8 @@ void ChemEnv::write_json_data(const std::string cmodule) { results["input"]["SCF"]["polvecs"] = scf.qed_polvecs; results["input"]["SCF"]["omegas"] = scf.qed_omegas; results["input"]["SCF"]["volumes"] = scf.qed_volumes; + results["input"]["SCF"]["xc_type"] = scf.xc_type; + results["input"]["SCF"]["snK"] = scf.snK; if(cmodule == "CD" || cmodule == "CCSD") { // CD options diff --git a/exachem/common/initialize_system_data.cpp b/exachem/common/initialize_system_data.cpp index bc393c5..2295768 100644 --- a/exachem/common/initialize_system_data.cpp +++ b/exachem/common/initialize_system_data.cpp @@ -8,6 +8,7 @@ void IniSystemData::initialize(ChemEnv& chem_env) { chem_env.sys_data.is_ks = false; chem_env.sys_data.is_qed = false; chem_env.sys_data.do_qed = false; + chem_env.sys_data.do_snK = false; chem_env.sys_data.freeze_atomic = false; SCFOptions& scf_options = chem_env.ioptions.scf_options; @@ -28,6 +29,8 @@ void IniSystemData::initialize(ChemEnv& chem_env) { else tamm_terminate("ERROR: unrecognized scf_type [" + scf_options.scf_type + "] provided"); if(!scf_options.xc_type.empty()) { chem_env.sys_data.is_ks = true; } + if(scf_options.snK) { chem_env.sys_data.do_snK = true; } + if(!scf_options.qed_volumes.empty()) { for(auto x: scf_options.qed_volumes) { scf_options.qed_lambdas.push_back(sqrt(1.0 / x)); } } diff --git a/exachem/common/options/input_options.cpp b/exachem/common/options/input_options.cpp index 92a23a2..1ae56f1 100644 --- a/exachem/common/options/input_options.cpp +++ b/exachem/common/options/input_options.cpp @@ -4,47 +4,47 @@ void SCFOptions::print() { std::cout << std::defaultfloat; std::cout << std::endl << "SCF Options" << std::endl; std::cout << "{" << std::endl; - std::cout << " charge = " << charge << std::endl; - std::cout << " multiplicity = " << multiplicity << std::endl; - std::cout << " level shift = " << lshift << std::endl; - std::cout << " tol_int = " << tol_int << std::endl; - std::cout << " tol_sch = " << tol_sch << std::endl; - std::cout << " tol_lindep = " << tol_lindep << std::endl; - std::cout << " conve = " << conve << std::endl; - std::cout << " convd = " << convd << std::endl; - std::cout << " diis_hist = " << diis_hist << std::endl; - std::cout << " AO_tilesize = " << AO_tilesize << std::endl; - std::cout << " writem = " << writem << std::endl; - std::cout << " damp = " << damp << std::endl; - std::cout << " n_lindep = " << n_lindep << std::endl; + std::cout << " charge = " << charge << std::endl; + std::cout << " multiplicity = " << multiplicity << std::endl; + std::cout << " level shift = " << lshift << std::endl; + std::cout << " tol_int = " << tol_int << std::endl; + std::cout << " tol_sch = " << tol_sch << std::endl; + std::cout << " tol_lindep = " << tol_lindep << std::endl; + std::cout << " conve = " << conve << std::endl; + std::cout << " convd = " << convd << std::endl; + std::cout << " diis_hist = " << diis_hist << std::endl; + std::cout << " AO_tilesize = " << AO_tilesize << std::endl; + std::cout << " writem = " << writem << std::endl; + std::cout << " damp = " << damp << std::endl; + std::cout << " n_lindep = " << n_lindep << std::endl; if(!moldenfile.empty()) { - std::cout << " moldenfile = " << moldenfile << std::endl; + std::cout << " moldenfile = " << moldenfile << std::endl; // std::cout << " n_lindep = " << n_lindep << std::endl; } - std::cout << " scf_type = " << scf_type << std::endl; + std::cout << " scf_type = " << scf_type << std::endl; // QED if(!qed_omegas.empty()) { - std::cout << " qed_omegas = ["; + std::cout << " qed_omegas = ["; for(auto x: qed_omegas) { std::cout << x << ","; } std::cout << "\b]" << std::endl; } if(!qed_lambdas.empty()) { - std::cout << " qed_lambdas = ["; + std::cout << " qed_lambdas = ["; for(auto x: qed_lambdas) { std::cout << x << ","; } std::cout << "\b]" << std::endl; } if(!qed_volumes.empty()) { - std::cout << " qed_volumes = ["; + std::cout << " qed_volumes = ["; for(auto x: qed_volumes) { std::cout << x << ","; } std::cout << "\b]" << std::endl; } if(!qed_polvecs.empty()) { - std::cout << " qed_polvecs = ["; + std::cout << " qed_polvecs = ["; for(auto x: qed_polvecs) { std::cout << "["; for(auto y: x) { std::cout << y << ","; } @@ -53,22 +53,46 @@ void SCFOptions::print() { std::cout << "\b]" << std::endl; } - if(!xc_type.empty()) { - std::cout << " xc_type = [ "; + txt_utils::print_bool(" direct_df ", direct_df); + + if(!xc_type.empty() || snK) { + std::cout << " DFT " << std::endl << " {" << std::endl; + std::cout << " xc_type = [ "; for(auto xcfunc: xc_type) { std::cout << " \"" << xcfunc << "\","; } std::cout << "\b ]" << std::endl; - std::cout << " xc_grid_type = " << xc_grid_type << std::endl; + + std::cout << " xc_grid_type = " << xc_grid_type << std::endl; + std::cout << " xc_pruning_scheme = " << xc_pruning_scheme << std::endl; + std::cout << " xc_weight_scheme = " << xc_weight_scheme << std::endl; + std::cout << " xc_exec_space = " << xc_exec_space << std::endl; + std::cout << " xc_basis_tol = " << xc_basis_tol << std::endl; + std::cout << " xc_batch_size = " << xc_batch_size << std::endl; + std::cout << " xc_lb_kernel = " << xc_lb_kernel << std::endl; + std::cout << " xc_mw_kernel = " << xc_mw_kernel << std::endl; + std::cout << " xc_int_kernel = " << xc_int_kernel << std::endl; + std::cout << " xc_red_kernel = " << xc_red_kernel << std::endl; + std::cout << " xc_lwd_kernel = " << xc_lwd_kernel << std::endl; + if(xc_radang_size.first > 0 && xc_radang_size.second > 0) { + std::cout << " xc_radang_size = " << xc_radang_size.first << ", " << xc_radang_size.second + << std::endl; + } + else { std::cout << " xc_rad_quad = " << xc_rad_quad << std::endl; } + + txt_utils::print_bool(" snK ", snK); + std::cout << " xc_snK_etol = " << xc_snK_etol << std::endl; + std::cout << " xc_snK_ktol = " << xc_snK_ktol << std::endl; + std::cout << " }" << std::endl; } if(scalapack_np_row > 0 && scalapack_np_col > 0) { - std::cout << " scalapack_np_row = " << scalapack_np_row << std::endl; - std::cout << " scalapack_np_col = " << scalapack_np_col << std::endl; - if(scalapack_nb > 1) std::cout << " scalapack_nb = " << scalapack_nb << std::endl; + std::cout << " scalapack_np_row = " << scalapack_np_row << std::endl; + std::cout << " scalapack_np_col = " << scalapack_np_col << std::endl; + if(scalapack_nb > 1) std::cout << " scalapack_nb = " << scalapack_nb << std::endl; } - std::cout << " restart_size = " << restart_size << std::endl; - txt_utils::print_bool(" restart ", restart); - txt_utils::print_bool(" debug ", debug); - if(restart) txt_utils::print_bool(" noscf ", noscf); + std::cout << " restart_size = " << restart_size << std::endl; + txt_utils::print_bool(" restart ", restart); + txt_utils::print_bool(" debug ", debug); + if(restart) txt_utils::print_bool(" noscf ", noscf); // txt_utils::print_bool(" sad ", sad); if(mulliken_analysis || mos_txt || mo_vectors_analysis.first) { std::cout << " PRINT {" << std::endl; diff --git a/exachem/common/options/input_options.hpp b/exachem/common/options/input_options.hpp index 91af26e..45f290c 100644 --- a/exachem/common/options/input_options.hpp +++ b/exachem/common/options/input_options.hpp @@ -40,17 +40,32 @@ class SCFOptions: public CommonOptions { bool sad{true}; bool force_tilesize{false}; bool direct_df{false}; + bool snK{false}; int restart_size{2000}; // read/write orthogonalizer, schwarz, etc matrices when N>=restart_size int scalapack_nb{256}; int nnodes{1}; int scalapack_np_row{0}; int scalapack_np_col{0}; - std::string moldenfile{""}; - int n_lindep{0}; - int writem{1}; - int damp{100}; // density mixing parameter - std::string scf_type{"restricted"}; - std::string xc_grid_type{"UltraFine"}; + std::string moldenfile{""}; + int n_lindep{0}; + int writem{1}; + int damp{100}; // density mixing parameter + std::string scf_type{"restricted"}; + std::string xc_grid_type{"UltraFine"}; + std::string xc_pruning_scheme{"Robust"}; + std::string xc_rad_quad{"MK"}; + std::string xc_weight_scheme{"SSF"}; + std::string xc_exec_space{"Host"}; + std::string xc_lb_kernel{"Default"}; + std::string xc_mw_kernel{"Default"}; + std::string xc_int_kernel{"Default"}; + std::string xc_red_kernel{"Default"}; + std::string xc_lwd_kernel{"Default"}; + std::pair xc_radang_size{0, 0}; + int xc_batch_size{2048}; + double xc_basis_tol{1e-8}; + double xc_snK_etol{1e-10}; + double xc_snK_ktol{1e-10}; std::map> guess_atom_options; diff --git a/exachem/common/options/parse_scf_options.cpp b/exachem/common/options/parse_scf_options.cpp index 9468acb..f8eb918 100644 --- a/exachem/common/options/parse_scf_options.cpp +++ b/exachem/common/options/parse_scf_options.cpp @@ -15,20 +15,31 @@ void ParseSCFOptions::parse_check(json& jinput) { const std::vector valid_scf{"charge", "multiplicity", "lshift", "tol_int", "tol_sch", "tol_lindep", "conve", "convd", "diis_hist","force_tilesize","tilesize","df_tilesize", "damp","writem","nnodes","restart","noscf","moldenfile", "guess", - "debug","scf_type","xc_type", "xc_grid_type", "n_lindep","restart_size","scalapack_nb", - "scalapack_np_row","scalapack_np_col","ext_data_path","PRINT", - "qed_omegas","qed_lambdas","qed_volumes","qed_polvecs","direct_df","comments"}; + "debug","scf_type", "n_lindep","restart_size","scalapack_nb", + "scalapack_np_row", "scalapack_np_col", "ext_data_path", "PRINT", + "qed_omegas", "qed_lambdas", "qed_volumes", "qed_polvecs", + "direct_df", "DFT", "comments"}; + const std::vector valid_dft{"xc_pruning_scheme", "xc_rad_quad", "xc_batch_size", + "xc_snK_etol", "xc_snK_ktol", "xc_weight_scheme", "xc_exec_space", "snK", "xc_type", + "xc_lb_kernel", "xc_mw_kernel", "xc_int_kernel", "xc_red_kernel", "xc_lwd_kernel", + "xc_radang_size", "xc_basis_tol", "xc_grid_type"}; // clang-format on for(auto& el: jinput["SCF"].items()) { if(std::find(valid_scf.begin(), valid_scf.end(), el.key()) == valid_scf.end()) tamm_terminate("INPUT FILE ERROR: Invalid SCF option [" + el.key() + "] in the input file"); } + for(auto& el: jinput["SCF"]["DFT"].items()) { + if(std::find(valid_dft.begin(), valid_dft.end(), el.key()) == valid_dft.end()) + tamm_terminate("INPUT FILE ERROR: Invalid DFT option in SCF block [" + el.key() + "]"); + } } void ParseSCFOptions::parse(ChemEnv& chem_env) { json jscf = chem_env.jinput["SCF"]; SCFOptions& scf_options = chem_env.ioptions.scf_options; + update_common_options(chem_env); + parse_option(scf_options.charge, jscf, "charge"); parse_option(scf_options.multiplicity, jscf, "multiplicity"); parse_option(scf_options.lshift, jscf, "lshift"); @@ -49,8 +60,27 @@ void ParseSCFOptions::parse(ChemEnv& chem_env) { parse_option(scf_options.debug, jscf, "debug"); parse_option(scf_options.moldenfile, jscf, "moldenfile"); parse_option(scf_options.scf_type, jscf, "scf_type"); - parse_option(scf_options.xc_grid_type, jscf, "xc_grid_type"); - parse_option>(scf_options.xc_type, jscf, "xc_type"); + parse_option(scf_options.direct_df, jscf, "direct_df"); + + json jdft = jscf["DFT"]; + parse_option(scf_options.snK, jdft, "snK"); + parse_option(scf_options.xc_grid_type, jdft, "xc_grid_type"); + parse_option(scf_options.xc_pruning_scheme, jdft, "xc_pruning_scheme"); + parse_option(scf_options.xc_rad_quad, jdft, "xc_rad_quad"); + parse_option(scf_options.xc_weight_scheme, jdft, "xc_weight_scheme"); + parse_option(scf_options.xc_exec_space, jdft, "xc_exec_space"); + parse_option(scf_options.xc_lb_kernel, jdft, "xc_lb_kernel"); + parse_option(scf_options.xc_mw_kernel, jdft, "xc_mw_kernel"); + parse_option(scf_options.xc_int_kernel, jdft, "xc_int_kernel"); + parse_option(scf_options.xc_red_kernel, jdft, "xc_red_kernel"); + parse_option(scf_options.xc_lwd_kernel, jdft, "xc_lwd_kernel"); + parse_option>(scf_options.xc_type, jdft, "xc_type"); + parse_option>(scf_options.xc_radang_size, jdft, "xc_radang_size"); + parse_option(scf_options.xc_batch_size, jdft, "xc_batch_size"); + parse_option(scf_options.xc_basis_tol, jdft, "xc_basis_tol"); + parse_option(scf_options.xc_snK_etol, jdft, "xc_snK_etol"); + parse_option(scf_options.xc_snK_ktol, jdft, "xc_snK_ktol"); + parse_option(scf_options.n_lindep, jscf, "n_lindep"); parse_option(scf_options.restart_size, jscf, "restart_size"); parse_option(scf_options.scalapack_nb, jscf, "scalapack_nb"); @@ -61,7 +91,6 @@ void ParseSCFOptions::parse(ChemEnv& chem_env) { parse_option>(scf_options.qed_lambdas, jscf, "qed_lambdas"); parse_option>(scf_options.qed_volumes, jscf, "qed_volumes"); parse_option>>(scf_options.qed_polvecs, jscf, "qed_polvecs"); - parse_option(scf_options.direct_df, jscf, "direct_df"); json jscf_guess = jscf["guess"]; json jguess_atom_options = jscf_guess["atom_options"]; @@ -86,11 +115,11 @@ void ParseSCFOptions::parse(ChemEnv& chem_env) { xc_grid_str.end()); scf_options.xc_grid_type = xc_grid_str; std::transform(xc_grid_str.begin(), xc_grid_str.end(), xc_grid_str.begin(), ::tolower); - if(xc_grid_str != "fine" && xc_grid_str != "ultrafine" && xc_grid_str != "superfine") + if(xc_grid_str != "fine" && xc_grid_str != "ultrafine" && xc_grid_str != "superfine" && + xc_grid_str != "gm3" && xc_grid_str != "gm5") tamm_terminate("INPUT FILE ERROR: SCF option xc_grid_type should be one of [Fine, " - "UltraFine, SuperFine]"); + "UltraFine, SuperFine, GM3, GM5]"); } - update_common_options(chem_env); } void ParseSCFOptions::update_common_options(ChemEnv& chem_env) { diff --git a/exachem/common/system_data.cpp b/exachem/common/system_data.cpp index 384ad11..1e98f19 100644 --- a/exachem/common/system_data.cpp +++ b/exachem/common/system_data.cpp @@ -6,6 +6,7 @@ void SystemData::print() { if(is_unrestricted) std::cout << "Open-Shell SCF" << std::endl; if(is_restricted_os) std::cout << "Restricted Open-Shell SCF" << std::endl; if(is_ks) std::cout << "KS-DFT Enabled" << std::endl; + if(do_snK) std::cout << "snK Enabled" << std::endl; if(is_qed && do_qed && is_ks) { std::cout << "QED-KS Enabled" << std::endl; } else if(is_qed && is_ks) { std::cout << "QEDFT Enabled" << std::endl; } else if(is_qed) { std::cout << "QED-HF Enabled" << std::endl; } @@ -55,6 +56,7 @@ SystemData::SystemData(ECOptions options_map_, const std::string scf_type_string is_ks = false; is_qed = false; do_qed = false; + do_snK = false; freeze_atomic = false; if(scf_type_string == "restricted") { focc = 1; @@ -71,6 +73,8 @@ SystemData::SystemData(ECOptions options_map_, const std::string scf_type_string else tamm_terminate("ERROR: unrecognized scf_type [" + scf_type_string + "] provided"); if(!options_map_.scf_options.xc_type.empty()) { is_ks = true; } + if(options_map_.scf_options.snK) { do_snK = true; } + if(!options_map_.scf_options.qed_volumes.empty()) { for(auto x: options_map_.scf_options.qed_volumes) { options_map_.scf_options.qed_lambdas.push_back(sqrt(1.0 / x)); diff --git a/exachem/common/system_data.hpp b/exachem/common/system_data.hpp index 295f825..1d8cdc6 100644 --- a/exachem/common/system_data.hpp +++ b/exachem/common/system_data.hpp @@ -48,6 +48,7 @@ class SystemData { bool is_ks{}; bool is_qed{}; bool do_qed{}; + bool do_snK{}; bool has_ecp{}; // bool is_cas{};??? diff --git a/exachem/scf/scf_compute.cpp b/exachem/scf/scf_compute.cpp index 6d58942..2193c63 100644 --- a/exachem/scf/scf_compute.cpp +++ b/exachem/scf/scf_compute.cpp @@ -14,6 +14,211 @@ void exachem::scf::SCFCompute::compute_shellpair_list(const ExecutionContext& e << shells.size() * (shells.size() + 1) / 2 << "," << nsp << "}" << endl; } +void exachem::scf::SCFCompute::compute_trafo(const libint2::BasisSet& shells, + EigenTensors& etensors) { + std::vector& CtoS = etensors.trafo_ctos; + std::vector& StoC = etensors.trafo_stoc; + int lmax = shells.max_l(); + + static constexpr std::array fac = {{1LL, + 1LL, + 2LL, + 6LL, + 24LL, + 120LL, + 720LL, + 5040LL, + 40320LL, + 362880LL, + 3628800LL, + 39916800LL, + 479001600LL, + 6227020800LL, + 87178291200LL, + 1307674368000LL, + 20922789888000LL, + 355687428096000LL, + 6402373705728000LL, + 121645100408832000LL, + 2432902008176640000LL}}; + + auto binomial = [](int i, int j) -> int { + return (j < 0 || j > i) ? 0 : fac[i] / (fac[j] * fac[i - j]); + }; + + for(int l = 0; l <= lmax; l++) { + int c_size = (l + 1) * (l + 2) / 2; + int s_size = 2 * l + 1; + Matrix trafo = Matrix::Zero(s_size, c_size); + double norm2 = 1.0 / std::sqrt(((double) fac[2 * l]) / (pow(2.0, 2 * l) * fac[l])); + + for(int lx = l, ic = 0; lx >= 0; lx--) { + for(int ly = l - lx; ly >= 0; ly--, ic++) { + int lz = l - lx - ly; + double norm1 = 1.0 / std::sqrt(((double) fac[2 * lx] * fac[2 * ly] * fac[2 * lz]) / + (pow(2.0, 2 * l) * fac[lx] * fac[ly] * fac[lz])); + double factor = norm1 / norm2; + + for(int m = -l, is = 0; m <= l; m++, is++) { + int ma = abs(m); + int j = lx + ly - ma; + if(j < 0 || j % 2 == 1) continue; + j = j / 2; + double s1{0.0}; + + for(int i = 0; i <= (l - ma) / 2; i++) { + double s2{0.0}; + for(int k = 0; k <= j; k++) { + double s{0.0}; + int expo; + if((m < 0 && abs(ma - lx) % 2 == 1) || (m > 0 && abs(ma - lx) % 2 == 0)) { + expo = (ma - lx + 2 * k) / 2; + s = pow(-1.0, expo) * std::sqrt(2.0); + } + else if(m == 0 && lx % 2 == 0) { + expo = k - lx / 2; + s = pow(-1.0, expo); + } + else { s = 0.0; } + s2 += binomial(j, k) * binomial(ma, lx - 2 * k) * s; + } + s1 += binomial(l, i) * binomial(i, j) * fac[2 * l - 2 * i] * pow(-1.0, i) * s2 / + fac[l - ma - 2 * i]; + } + trafo(is, ic) = + factor * s1 / (fac[l] * pow(2.0, l)) * + std::sqrt(((double) fac[2 * lx] * fac[2 * ly] * fac[2 * lz] * fac[l] * fac[l - ma]) / + (fac[lx] * fac[ly] * fac[lz] * fac[2 * l] * fac[l + ma])); + } + } + } + CtoS.push_back(trafo); + } + + // Compute backtransformation + for(int l = 0; l <= lmax; l++) { + int c_size = (l + 1) * (l + 2) / 2; + int s_size = 2 * l + 1; + Matrix trafo = Matrix::Zero(s_size, c_size); + double norm2 = 1.0 / std::sqrt(fac[2 * l] / (pow(2.0, 2 * l) * fac[l])); + + for(int lx1 = l, ic1 = 0; lx1 >= 0; lx1--) { + for(int ly1 = l - lx1; ly1 >= 0; ly1--, ic1++) { + int lz1 = l - lx1 - ly1; + double s1 = std::sqrt(((double) fac[lx1] * fac[ly1] * fac[lz1]) / + (fac[2 * lx1] * fac[2 * ly1] * fac[2 * lz1])); + double norm11 = s1 * pow(2, l); + for(int lx2 = l, ic2 = 0; lx2 >= 0; lx2--) { + for(int ly2 = l - lx2; ly2 >= 0; ly2--, ic2++) { + int lz2 = l - lx2 - ly2; + int lx = lx1 + lx2; + int ly = ly1 + ly2; + int lz = lz1 + lz2; + if(lx % 2 == 1 || ly % 2 == 1 || lz % 2 == 1) continue; + double s2 = std::sqrt(((double) fac[lx2] * fac[ly2] * fac[lz2]) / + (fac[2 * lx2] * fac[2 * ly2] * fac[2 * lz2])); + double norm12 = s2 * pow(2, l); + double s = fac[lx] * fac[ly] * fac[lz] * s1 * s2 / + (fac[lx / 2] * fac[ly / 2] * fac[lz / 2]) * norm2 / norm11 * norm2 / norm12; + for(int is = 0; is <= 2 * l; is++) trafo(is, ic1) += s * CtoS[l](is, ic2); + } + } + } + } + StoC.push_back(trafo); + } +} + +template +void exachem::scf::SCFCompute::compute_sdens_to_cdens(const libint2::BasisSet& shells, + Matrix& Spherical, Matrix& Cartesian, + EigenTensors& etensors) { + std::vector& CtoS = etensors.trafo_ctos; + auto shell2bf = shells.shell2bf(); + int nsh = shells.size(); + int Nspher{0}; + int Ncart{0}; + for(auto& shell: shells) { + int l = shell.contr[0].l; + Nspher += 2 * l + 1; + Ncart += ((l + 1) * (l + 2)) / 2; + } + + Cartesian = Matrix::Zero(Ncart, Ncart); + int bf1_cartesian = 0; + for(int sh1 = 0; sh1 < nsh; sh1++) { + int l1 = shells[sh1].contr[0].l; + int bf1_spherical = shell2bf[sh1]; + int n1_spherical = shells[sh1].size(); + int n1_cartesian = ((l1 + 1) * (l1 + 2)) / 2; + int bf2_cartesian = 0; + for(int sh2 = 0; sh2 < nsh; sh2++) { + int l2 = shells[sh2].contr[0].l; + int bf2_spherical = shell2bf[sh2]; + int n2_spherical = shells[sh2].size(); + int n2_cartesian = ((l2 + 1) * (l2 + 2)) / 2; + for(int is1 = 0; is1 < n1_spherical; is1++) { + for(int is2 = 0; is2 < n2_spherical; is2++) { + for(int ic1 = 0; ic1 < n1_cartesian; ic1++) { + for(int ic2 = 0; ic2 < n2_cartesian; ic2++) { + Cartesian(bf1_cartesian + ic1, bf2_cartesian + ic2) += + CtoS[l1](is1, ic1) * Spherical(bf1_spherical + is1, bf2_spherical + is2) * + CtoS[l2](is2, ic2); + } + } + } + } + bf2_cartesian += n2_cartesian; + } + bf1_cartesian += n1_cartesian; + } +} + +template +void exachem::scf::SCFCompute::compute_cpot_to_spot(const libint2::BasisSet& shells, + Matrix& Spherical, Matrix& Cartesian, + EigenTensors& etensors) { + std::vector& CtoS = etensors.trafo_ctos; + auto shell2bf = shells.shell2bf(); + int nsh = shells.size(); + int Nspher{0}; + int Ncart{0}; + for(auto& shell: shells) { + int l = shell.contr[0].l; + Nspher += 2 * l + 1; + Ncart += ((l + 1) * (l + 2)) / 2; + } + + Spherical = Matrix::Zero(Nspher, Nspher); + int bf1_cartesian = 0; + for(int sh1 = 0; sh1 < nsh; sh1++) { + int l1 = shells[sh1].contr[0].l; + int bf1_spherical = shell2bf[sh1]; + int n1_spherical = shells[sh1].size(); + int n1_cartesian = ((l1 + 1) * (l1 + 2)) / 2; + int bf2_cartesian = 0; + for(int sh2 = 0; sh2 < nsh; sh2++) { + int l2 = shells[sh2].contr[0].l; + int bf2_spherical = shell2bf[sh2]; + int n2_spherical = shells[sh2].size(); + int n2_cartesian = ((l2 + 1) * (l2 + 2)) / 2; + for(int is1 = 0; is1 < n1_spherical; is1++) { + for(int is2 = 0; is2 < n2_spherical; is2++) { + for(int ic1 = 0; ic1 < n1_cartesian; ic1++) { + for(int ic2 = 0; ic2 < n2_cartesian; ic2++) { + Spherical(bf1_spherical + is1, bf2_spherical + is2) += + CtoS[l1](is1, ic1) * Cartesian(bf1_cartesian + ic1, bf2_cartesian + ic2) * + CtoS[l2](is2, ic2); + } + } + } + } + bf2_cartesian += n2_cartesian; + } + bf1_cartesian += n1_cartesian; + } +} + std::tuple exachem::scf::SCFCompute::compute_NRE(const ExecutionContext& ec, std::vector& atoms) { // count the number of electrons @@ -203,7 +408,8 @@ void exachem::scf::SCFCompute::compute_density(ExecutionContext& ec, ChemEnv& ch sch.execute(); // compute D in eigen for subsequent fock build - if(!scf_vars.do_dens_fit || scf_vars.direct_df || chem_env.sys_data.is_ks) { + if(!scf_vars.do_dens_fit || scf_vars.direct_df || chem_env.sys_data.is_ks || + chem_env.sys_data.do_snK) { tamm_to_eigen_tensor(ttensors.D_alpha, D_alpha); if(is_uhf) tamm_to_eigen_tensor(ttensors.D_beta, etensors.D_beta); } @@ -403,6 +609,12 @@ template Matrix exachem::scf::SCFCompute::compute_schwarz_ints::oper_params_type params); +template void exachem::scf::SCFCompute::compute_sdens_to_cdens( + const libint2::BasisSet& shells, Matrix& Spherical, Matrix& Cartesian, EigenTensors& etensors); + +template void exachem::scf::SCFCompute::compute_cpot_to_spot( + const libint2::BasisSet& shells, Matrix& Spherical, Matrix& Cartesian, EigenTensors& etensors); + template void exachem::scf::SCFCompute::compute_hamiltonian( ExecutionContext& ec, const SCFVars& scf_vars, std::vector& atoms, libint2::BasisSet& shells, TAMMTensors& ttensors, EigenTensors& etensors); diff --git a/exachem/scf/scf_compute.hpp b/exachem/scf/scf_compute.hpp index 0b6c1b3..9d2992c 100644 --- a/exachem/scf/scf_compute.hpp +++ b/exachem/scf/scf_compute.hpp @@ -20,6 +20,7 @@ class SCFCompute { Matrix compute_shellblock_norm(const libint2::BasisSet& obs, const Matrix& A); void compute_shellpair_list(const ExecutionContext& ec, const libint2::BasisSet& shells, SCFVars& scf_vars); + void compute_trafo(const libint2::BasisSet& shells, EigenTensors& etensors); std::tuple compute_NRE(const ExecutionContext& ec, std::vector& atoms); std::tuple @@ -35,6 +36,14 @@ class SCFCompute { void recompute_tilesize(tamm::Tile& tile_size, const int N, const bool force_ts, const bool rank0); + template + void compute_sdens_to_cdens(const libint2::BasisSet& shells, Matrix& Spherical, Matrix& Cartesian, + EigenTensors& etensors); + + template + void compute_cpot_to_spot(const libint2::BasisSet& shells, Matrix& Spherical, Matrix& Cartesian, + EigenTensors& etensors); + template void compute_hamiltonian(ExecutionContext& ec, const SCFVars& scf_vars, std::vector& atoms, libint2::BasisSet& shells, diff --git a/exachem/scf/scf_eigen_tensors.hpp b/exachem/scf/scf_eigen_tensors.hpp index 9132e35..96aead8 100644 --- a/exachem/scf/scf_eigen_tensors.hpp +++ b/exachem/scf/scf_eigen_tensors.hpp @@ -8,7 +8,10 @@ class EigenTensors { Matrix VXC_alpha, VXC_beta; // allocated only on rank 0 when DFT is enabled Matrix G_alpha, D_alpha; // allocated on all ranks for 4c HF, only on rank 0 otherwise. Matrix G_beta, D_beta; // allocated on all ranks for 4c HF, only D_beta on rank 0 otherwise. + Matrix D_alpha_cart, D_beta_cart; + Matrix VXC_alpha_cart, VXC_beta_cart; Eigen::Vector dfNorm; // Normalization coefficients for DF basis + std::vector trafo_ctos, trafo_stoc; Eigen::Matrix taskmap; // on all ranks for 4c HF only }; diff --git a/exachem/scf/scf_gauxc.cpp b/exachem/scf/scf_gauxc.cpp index c6e0278..7c69f2f 100644 --- a/exachem/scf/scf_gauxc.cpp +++ b/exachem/scf/scf_gauxc.cpp @@ -5,7 +5,6 @@ std::tuple>, double> exachem::scf::gauxc::setup_gauxc(ExecutionContext& ec, const ChemEnv& chem_env, const SCFVars& scf_vars) { - size_t batch_size = 512; const SystemData& sys_data = chem_env.sys_data; const SCFOptions& scf_options = chem_env.ioptions.scf_options; @@ -21,44 +20,85 @@ exachem::scf::gauxc::setup_gauxc(ExecutionContext& ec, const ChemEnv& chem_env, auto gc1 = std::chrono::high_resolution_clock::now(); auto gauxc_mol = make_gauxc_molecule(atoms); - auto gauxc_basis = make_gauxc_basis(shells); - auto gauxc_rt = GauXC::RuntimeEnvironment(ec.pg().comm()); + auto gauxc_basis = make_gauxc_basis(shells, scf_options.xc_basis_tol); auto polar = is_rhf ? ExchCXX::Spin::Unpolarized : ExchCXX::Spin::Polarized; - auto grid_type = GauXC::AtomicGridSizeDefault::UltraFineGrid; - auto xc_grid_str = scf_options.xc_grid_type; + auto xc_radang_size = scf_options.xc_radang_size; + auto xc_grid_str = scf_options.xc_grid_type; std::transform(xc_grid_str.begin(), xc_grid_str.end(), xc_grid_str.begin(), ::tolower); - if(xc_grid_str == "fine") grid_type = GauXC::AtomicGridSizeDefault::FineGrid; - else if(xc_grid_str == "superfine") grid_type = GauXC::AtomicGridSizeDefault::SuperFineGrid; + std::map grid_map = { + {"fine", GauXC::AtomicGridSizeDefault::FineGrid}, + {"ultrafine", GauXC::AtomicGridSizeDefault::UltraFineGrid}, + {"superfine", GauXC::AtomicGridSizeDefault::SuperFineGrid}, + {"gm3", GauXC::AtomicGridSizeDefault::GM3}, + {"gm5", GauXC::AtomicGridSizeDefault::GM5}}; + const bool use_custom_grid = xc_radang_size.first > 0 && xc_radang_size.second > 0; // This options are set to get good accuracy. // [Unpruned, Robust, Treutler] - auto gauxc_molgrid = GauXC::MolGridFactory::create_default_molgrid( - gauxc_mol, GauXC::PruningScheme::Robust, GauXC::BatchSize(batch_size), - GauXC::RadialQuad::MuraKnowles, grid_type); + auto xc_pruning_str = scf_options.xc_pruning_scheme; + std::transform(xc_pruning_str.begin(), xc_pruning_str.end(), xc_pruning_str.begin(), ::tolower); + std::map pruning_map = { + {"robust", GauXC::PruningScheme::Robust}, + {"treutler", GauXC::PruningScheme::Treutler}, + {"unpruned", GauXC::PruningScheme::Unpruned}}; + + auto xc_radquad_str = scf_options.xc_rad_quad; + std::transform(xc_radquad_str.begin(), xc_radquad_str.end(), xc_radquad_str.begin(), ::tolower); + std::map radquad_map = { + {"mk", GauXC::RadialQuad::MuraKnowles}, + {"ta", GauXC::RadialQuad::TreutlerAldrichs}, + {"mhl", GauXC::RadialQuad::MurrayHandyLaming}}; + + auto gauxc_molgrid = + use_custom_grid + ? GauXC::MolGridFactory::create_default_molgrid( + gauxc_mol, pruning_map.at(xc_pruning_str), + GauXC::BatchSize((size_t) scf_options.xc_batch_size), radquad_map.at(xc_radquad_str), + GauXC::RadialSize(xc_radang_size.first), GauXC::AngularSize(xc_radang_size.second)) + : GauXC::MolGridFactory::create_default_molgrid( + gauxc_mol, pruning_map.at(xc_pruning_str), + GauXC::BatchSize((size_t) scf_options.xc_batch_size), radquad_map.at(xc_radquad_str), + grid_map.at(xc_grid_str)); + auto gauxc_molmeta = std::make_shared(gauxc_mol); - std::string lb_exec_space_str = "HOST"; - std::string int_exec_space_str = "HOST"; + auto xc_space_str = scf_options.xc_exec_space; + std::transform(xc_space_str.begin(), xc_space_str.end(), xc_space_str.begin(), ::tolower); #ifdef GAUXC_HAS_DEVICE - std::map exec_space_map = { - {"HOST", GauXC::ExecutionSpace::Host}, {"DEVICE", GauXC::ExecutionSpace::Device}}; + double gauxc_gpu_pool = 0.1; + if(const char* tamm_gpu_pool_char = std::getenv("TAMM_GPU_POOL")) { + int tamm_gpu_pool = std::atoi(tamm_gpu_pool_char); + gauxc_gpu_pool = 0.9 - tamm_gpu_pool / 100.0; + } - auto lb_exec_space = exec_space_map.at(lb_exec_space_str); - auto int_exec_space = exec_space_map.at(int_exec_space_str); + std::map exec_space_map = { + {"host", GauXC::ExecutionSpace::Host}, {"device", GauXC::ExecutionSpace::Device}}; + auto lb_exec_space = exec_space_map.at(xc_space_str); + auto int_exec_space = exec_space_map.at(xc_space_str); + auto gauxc_rt = txt_utils::strequal_case(xc_space_str, "device") + ? GauXC::DeviceRuntimeEnvironment(ec.pg().comm(), gauxc_gpu_pool) + : GauXC::RuntimeEnvironment(ec.pg().comm()); #else + auto gauxc_rt = GauXC::RuntimeEnvironment(ec.pg().comm()); auto lb_exec_space = GauXC::ExecutionSpace::Host; auto int_exec_space = GauXC::ExecutionSpace::Host; #endif // Set the load balancer - GauXC::LoadBalancerFactory lb_factory(lb_exec_space, "Replicated"); + GauXC::LoadBalancerFactory lb_factory(lb_exec_space, scf_options.xc_lb_kernel); auto gauxc_lb = lb_factory.get_shared_instance(gauxc_rt, gauxc_mol, gauxc_molgrid, gauxc_basis); // Modify the weighting algorithm from the input [Becke, SSF, LKO] - GauXC::MolecularWeightsSettings mw_settings = {GauXC::XCWeightAlg::LKO, false}; - GauXC::MolecularWeightsFactory mw_factory(int_exec_space, "Default", mw_settings); + auto xc_weight_str = scf_options.xc_weight_scheme; + std::transform(xc_weight_str.begin(), xc_weight_str.end(), xc_weight_str.begin(), ::tolower); + std::map weight_map = {{"ssf", GauXC::XCWeightAlg::SSF}, + {"becke", GauXC::XCWeightAlg::Becke}, + {"lko", GauXC::XCWeightAlg::LKO}}; + + GauXC::MolecularWeightsSettings mw_settings = {weight_map.at(xc_weight_str), false}; + GauXC::MolecularWeightsFactory mw_factory(int_exec_space, scf_options.xc_mw_kernel, mw_settings); auto mw = mw_factory.get_instance(); mw.modify_weights(*gauxc_lb); @@ -73,12 +113,51 @@ exachem::scf::gauxc::setup_gauxc(ExecutionContext& ec, const ChemEnv& chem_env, std::transform(xcfunc.begin(), xcfunc.end(), xcfunc.begin(), ::toupper); if(rank == 0) std::cout << "Functional: " << xcfunc << std::endl; - try { - // Try to setup using the builtin backend. + // First try few functionals defined in ExchCXX + if(txt_utils::strequal_case(xcfunc, "BLYP")) { + kernels.push_back( + ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value("B88"), polar)); + kernels.push_back( + ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value("LYP"), polar)); + } + else if(txt_utils::strequal_case(xcfunc, "PBE")) { + kernels.push_back( + ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value("PBE_X"), polar)); + kernels.push_back( + ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value("PBE_C"), polar)); + // SCAN and R2SCAN might not be implemented in current version, fallback to LibXC + } + else if(txt_utils::strequal_case(xcfunc, "SCAN")) { + if(ExchCXX::kernel_map.key_exists("SCAN_X")) { + kernels.push_back( + ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value("SCAN_X"), polar)); + kernels.push_back( + ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value("SCAN_C"), polar)); + } + else { + kernels.push_back(ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_X_SCAN"), polar)); + kernels.push_back(ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_C_SCAN"), polar)); + } + } + else if(txt_utils::strequal_case(xcfunc, "R2SCAN")) { + if(ExchCXX::kernel_map.key_exists("R2SCAN_X")) { + kernels.push_back(ExchCXX::XCKernel(ExchCXX::Backend::builtin, + ExchCXX::kernel_map.value("R2SCAN_X"), polar)); + kernels.push_back(ExchCXX::XCKernel(ExchCXX::Backend::builtin, + ExchCXX::kernel_map.value("R2SCAN_C"), polar)); + } + else { + kernels.push_back(ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_X_R2SCAN"), polar)); + kernels.push_back(ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_C_R2SCAN"), polar)); + } + // Try using the builtin backend + } + else if(ExchCXX::kernel_map.key_exists(xcfunc)) { kernels.push_back( ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value(xcfunc), polar)); - } catch(...) { - // If the above failed, setup with LibXC backend + // Fallback to LibXC + } + else { kernels.push_back(ExchCXX::XCKernel(ExchCXX::libxc_name_string(xcfunc), polar)); if(txt_utils::strequal_case(xcfunc, "HYB_GGA_X_QED") || txt_utils::strequal_case(xcfunc, "HYB_GGA_XC_QED") || @@ -93,15 +172,25 @@ exachem::scf::gauxc::setup_gauxc(ExecutionContext& ec, const ChemEnv& chem_env, // if(is_qed && kernel_id > -1) scf_qed.qed_functionals_setup(params, chem_env); // gauxc_integrator.set_ext_params( params ); + // Setup dummy functional for snK calculations and no XC functional + bool dummy_xc{false}; + if(scf_options.snK && kernels.size() < 1) { + kernels.push_back( + ExchCXX::XCKernel(ExchCXX::Backend::builtin, ExchCXX::kernel_map.value("PBE_X"), polar)); + dummy_xc = true; + } + GauXC::functional_type gauxc_func = GauXC::functional_type(kernels); // Initialize GauXC integrator - GauXC::XCIntegratorFactory integrator_factory(int_exec_space, "Replicated", "Default", - "Default", "Default"); - auto gc2 = std::chrono::high_resolution_clock::now(); + GauXC::XCIntegratorFactory integrator_factory( + int_exec_space, "Replicated", scf_options.xc_int_kernel, scf_options.xc_red_kernel, + scf_options.xc_lwd_kernel); + + auto gc2 = std::chrono::high_resolution_clock::now(); auto gc_time = std::chrono::duration_cast>((gc2 - gc1)).count(); - double xHF = gauxc_func.is_hyb() ? gauxc_func.hyb_exx() : 0.0; + double xHF = dummy_xc ? 1.0 : (gauxc_func.is_hyb() ? gauxc_func.hyb_exx() : 0.0); if(rank == 0) std::cout << std::fixed << std::setprecision(2) << "GauXC setup time: " << gc_time << "s\n"; @@ -118,7 +207,8 @@ GauXC::Molecule exachem::scf::gauxc::make_gauxc_molecule(const std::vector exachem::scf::gauxc::make_gauxc_basis(const libint2::BasisSet& basis) { +GauXC::BasisSet exachem::scf::gauxc::make_gauxc_basis(const libint2::BasisSet& basis, + const double basis_tol) { using shell_t = GauXC::Shell; using prim_t = typename shell_t::prim_array; using cart_t = typename shell_t::cart_array; @@ -132,14 +222,94 @@ GauXC::BasisSet exachem::scf::gauxc::make_gauxc_basis(const libint2::Bas std::copy(shell.contr[0].coeff.begin(), shell.contr[0].coeff.end(), coeff_array.begin()); std::copy(shell.O.begin(), shell.O.end(), origin.begin()); +#if defined(GAUXC_HAS_DEVICE) + gauxc_basis.emplace_back(GauXC::PrimSize(shell.alpha.size()), + GauXC::AngularMomentum(shell.contr[0].l), GauXC::SphericalType(false), + prim_array, coeff_array, origin, false); +#else gauxc_basis.emplace_back( GauXC::PrimSize(shell.alpha.size()), GauXC::AngularMomentum(shell.contr[0].l), GauXC::SphericalType(shell.contr[0].pure), prim_array, coeff_array, origin, false); +#endif } // gauxc_basis.generate_shell_to_ao(); + for(auto& sh: gauxc_basis) sh.set_shell_tolerance(basis_tol); + return gauxc_basis; } +template +void exachem::scf::gauxc::compute_exx(ExecutionContext& ec, ChemEnv& chem_env, SCFVars& scf_vars, + exachem::scf::TAMMTensors& ttensors, + exachem::scf::EigenTensors& etensors, + GauXC::XCIntegrator& xc_integrator) { + const SystemData& sys_data = chem_env.sys_data; + const SCFOptions& scf_options = chem_env.ioptions.scf_options; + exachem::scf::SCFCompute scf_compute; + + Scheduler sch{ec}; + const TiledIndexSpace& tAO = scf_vars.tAO; + auto [mu, nu] = tAO.labels<2>("all"); + + const bool is_uhf = sys_data.is_unrestricted; + const bool is_rhf = sys_data.is_restricted; + auto rank0 = ec.pg().rank() == 0; + auto factor = is_rhf ? 0.5 * scf_vars.xHF : scf_vars.xHF; + + GauXC::IntegratorSettingsSNLinK sn_link_settings; + sn_link_settings.energy_tol = scf_options.xc_snK_etol; + sn_link_settings.k_tol = scf_options.xc_snK_ktol; + +#ifdef GAUXC_HAS_DEVICE + Matrix& D_alpha = etensors.D_alpha_cart; + Matrix& K_alpha = etensors.VXC_alpha_cart; + Matrix& D_beta = etensors.D_beta_cart; + Matrix& K_beta = etensors.VXC_beta_cart; + const libint2::BasisSet& shells = chem_env.shells; + scf_compute.compute_sdens_to_cdens(shells, etensors.D_alpha, D_alpha, etensors); + if(is_uhf) + scf_compute.compute_sdens_to_cdens(shells, etensors.D_beta, D_beta, etensors); +#else + Matrix& D_alpha = etensors.D_alpha; + Matrix& K_alpha = etensors.G_alpha; + Matrix& D_beta = etensors.D_beta; + Matrix& K_beta = etensors.G_beta; +#endif + + K_alpha = xc_integrator.eval_exx(factor * D_alpha, sn_link_settings); +#ifdef GAUXC_HAS_DEVICE + scf_compute.compute_cpot_to_spot(shells, etensors.G_alpha, K_alpha, etensors); + K_alpha.resize(0, 0); + D_alpha.resize(0, 0); +#endif + if(rank0) eigen_to_tamm_tensor(ttensors.F_alpha_tmp, etensors.G_alpha); + ec.pg().barrier(); + + // clang-format off + sch + (ttensors.F_alpha(mu, nu) -= 0.5*ttensors.F_alpha_tmp(mu, nu)) + (ttensors.F_alpha(mu, nu) -= 0.5*ttensors.F_alpha_tmp(nu, mu)).execute(); + // clang-format on + + if(is_uhf) { + K_beta = xc_integrator.eval_exx(factor * D_beta, sn_link_settings); +#ifdef GAUXC_HAS_DEVICE + scf_compute.compute_cpot_to_spot(shells, etensors.G_beta, K_beta, etensors); + K_beta.resize(0, 0); + D_beta.resize(0, 0); +#endif + + if(rank0) eigen_to_tamm_tensor(ttensors.F_beta_tmp, etensors.G_beta); + ec.pg().barrier(); + + // clang-format off + sch + (ttensors.F_beta(mu, nu) -= 0.5*ttensors.F_beta_tmp(mu, nu)) + (ttensors.F_beta(mu, nu) -= 0.5*ttensors.F_beta_tmp(nu, mu)).execute(); + // clang-format on + } +} + template TensorType exachem::scf::gauxc::compute_xcf(ExecutionContext& ec, ChemEnv& chem_env, exachem::scf::TAMMTensors& ttensors, @@ -147,24 +317,53 @@ TensorType exachem::scf::gauxc::compute_xcf(ExecutionContext& ec, ChemEnv& chem_ GauXC::XCIntegrator& xc_integrator) { SystemData& sys_data = chem_env.sys_data; - const bool is_uhf = sys_data.is_unrestricted; - const bool is_rhf = sys_data.is_restricted; - auto rank0 = ec.pg().rank() == 0; + const bool is_uhf = sys_data.is_unrestricted; + const bool is_rhf = sys_data.is_restricted; + auto rank0 = ec.pg().rank() == 0; + exachem::scf::SCFCompute scf_compute; + + double EXC{}; - double EXC{}; +#ifdef GAUXC_HAS_DEVICE + Matrix& D_alpha = etensors.D_alpha_cart; + Matrix& vxc_alpha = etensors.VXC_alpha_cart; + Matrix& D_beta = etensors.D_beta_cart; + Matrix& vxc_beta = etensors.VXC_beta_cart; + const libint2::BasisSet& shells = chem_env.shells; + scf_compute.compute_sdens_to_cdens(shells, etensors.D_alpha, D_alpha, etensors); + if(is_uhf) + scf_compute.compute_sdens_to_cdens(shells, etensors.D_beta, D_beta, etensors); +#else + Matrix& D_alpha = etensors.D_alpha; Matrix& vxc_alpha = etensors.G_alpha; + Matrix& D_beta = etensors.D_beta; Matrix& vxc_beta = etensors.G_beta; +#endif if(is_rhf) { - std::tie(EXC, vxc_alpha) = xc_integrator.eval_exc_vxc(0.5 * etensors.D_alpha); - if(rank0) eigen_to_tamm_tensor(ttensors.VXC_alpha, vxc_alpha); + std::tie(EXC, vxc_alpha) = xc_integrator.eval_exc_vxc(0.5 * D_alpha); +#ifdef GAUXC_HAS_DEVICE + scf_compute.compute_cpot_to_spot(shells, etensors.G_alpha, vxc_alpha, etensors); + vxc_alpha.resize(0, 0); + D_alpha.resize(0, 0); +#endif + if(rank0) eigen_to_tamm_tensor(ttensors.VXC_alpha, etensors.G_alpha); } else if(is_uhf) { - std::tie(EXC, vxc_alpha, vxc_beta) = xc_integrator.eval_exc_vxc( - (etensors.D_alpha + etensors.D_beta), (etensors.D_alpha - etensors.D_beta)); + std::tie(EXC, vxc_alpha, vxc_beta) = + xc_integrator.eval_exc_vxc((D_alpha + D_beta), (D_alpha - D_beta)); +#ifdef GAUXC_HAS_DEVICE + scf_compute.compute_cpot_to_spot(shells, etensors.G_alpha, vxc_alpha, etensors); + scf_compute.compute_cpot_to_spot(shells, etensors.G_beta, vxc_beta, etensors); + vxc_alpha.resize(0, 0); + vxc_beta.resize(0, 0); + D_alpha.resize(0, 0); + D_beta.resize(0, 0); +#endif + if(rank0) { - eigen_to_tamm_tensor(ttensors.VXC_alpha, vxc_alpha); - eigen_to_tamm_tensor(ttensors.VXC_beta, vxc_beta); + eigen_to_tamm_tensor(ttensors.VXC_alpha, etensors.G_alpha); + eigen_to_tamm_tensor(ttensors.VXC_beta, etensors.G_beta); } } ec.pg().barrier(); @@ -176,4 +375,10 @@ template double exachem::scf::gauxc::compute_xcf( ExecutionContext& ec, ChemEnv& chem_env, exachem::scf::TAMMTensors& ttensors, exachem::scf::EigenTensors& etensors, GauXC::XCIntegrator& xc_integrator); +template void exachem::scf::gauxc::compute_exx(ExecutionContext& ec, ChemEnv& chem_env, + SCFVars& scf_vars, + exachem::scf::TAMMTensors& ttensors, + exachem::scf::EigenTensors& etensors, + GauXC::XCIntegrator& xc_integrator); + #endif diff --git a/exachem/scf/scf_gauxc.hpp b/exachem/scf/scf_gauxc.hpp index 63c6562..361301b 100644 --- a/exachem/scf/scf_gauxc.hpp +++ b/exachem/scf/scf_gauxc.hpp @@ -9,6 +9,7 @@ #if defined(USE_GAUXC) #include "common/chemenv.hpp" #include "common/options/input_options.hpp" +#include "scf/scf_compute.hpp" #include "scf/scf_eigen_tensors.hpp" #include "scf/scf_tamm_tensors.hpp" #include "scf/scf_vars.hpp" @@ -20,12 +21,17 @@ setup_gauxc(ExecutionContext& ec, const ChemEnv& chem_env, const SCFVars& scf_va GauXC::Molecule make_gauxc_molecule(const std::vector& atoms); -GauXC::BasisSet make_gauxc_basis(const libint2::BasisSet& basis); +GauXC::BasisSet make_gauxc_basis(const libint2::BasisSet& basis, const double basis_tol); template TensorType compute_xcf(ExecutionContext& ec, ChemEnv& chem_env, exachem::scf::TAMMTensors& ttensors, exachem::scf::EigenTensors& etensors, GauXC::XCIntegrator& xc_integrator); +template +void compute_exx(ExecutionContext& ec, ChemEnv& chem_env, SCFVars& scf_vars, + exachem::scf::TAMMTensors& ttensors, exachem::scf::EigenTensors& etensors, + GauXC::XCIntegrator& xc_integrator); + } // namespace exachem::scf::gauxc #endif diff --git a/exachem/scf/scf_guess.cpp b/exachem/scf/scf_guess.cpp index e61b86d..4f404f8 100644 --- a/exachem/scf/scf_guess.cpp +++ b/exachem/scf/scf_guess.cpp @@ -1462,13 +1462,15 @@ void exachem::scf::SCFGuess::compute_sad_guess(ExecutionContext& ec, ChemEnv& ch if((scf_vars.do_dens_fit && !scf_vars.direct_df) && rank == 0) { etensors.G_alpha.resize(0, 0); } - if(scf_vars.do_dens_fit && !(scf_vars.direct_df || chem_env.sys_data.is_ks)) { + if(scf_vars.do_dens_fit && + !(scf_vars.direct_df || chem_env.sys_data.is_ks || chem_env.sys_data.do_snK)) { etensors.D_alpha.resize(0, 0); if(is_uhf) etensors.D_beta.resize(0, 0); } // needed only for 4c HF - if(rank != 0 && (!scf_vars.do_dens_fit || scf_vars.direct_df || chem_env.sys_data.is_ks)) { + if(rank != 0 && (!scf_vars.do_dens_fit || scf_vars.direct_df || chem_env.sys_data.is_ks || + chem_env.sys_data.do_snK)) { tamm_to_eigen_tensor(ttensors.D_alpha, etensors.D_alpha); if(is_uhf) { tamm_to_eigen_tensor(ttensors.D_beta, etensors.D_beta); } } diff --git a/exachem/scf/scf_hartree_fock.cpp b/exachem/scf/scf_hartree_fock.cpp index c0417f6..c6b5628 100644 --- a/exachem/scf/scf_hartree_fock.cpp +++ b/exachem/scf/scf_hartree_fock.cpp @@ -88,12 +88,30 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e // SCFVars scf_vars; // init vars scf_vars.lshift = chem_env.ioptions.scf_options.lshift; - if(rank == 0) { - const double fock_precision = - std::min(chem_env.ioptions.scf_options.tol_sch, 1e-2 * chem_env.ioptions.scf_options.conve); - if(fock_precision < chem_env.ioptions.scf_options.tol_sch) - cout << "Resetting tol_sch to " << fock_precision << endl; + const double fock_precision = + std::min(chem_env.ioptions.scf_options.tol_sch, 1e-2 * chem_env.ioptions.scf_options.conve); + if(fock_precision < chem_env.ioptions.scf_options.tol_sch) { + chem_env.ioptions.scf_options.tol_sch = fock_precision; + if(rank == 0) cout << "Resetting tol_sch to " << fock_precision << endl; + } +#if defined(USE_GAUXC) + if(chem_env.ioptions.scf_options.snK) { + if(chem_env.ioptions.scf_options.xc_snK_etol > chem_env.ioptions.scf_options.conve) { + chem_env.ioptions.scf_options.xc_snK_etol = chem_env.ioptions.scf_options.conve; + if(rank == 0) + cout << "Resetting xc_snK_etol to " << chem_env.ioptions.scf_options.conve << endl; + } + if(chem_env.ioptions.scf_options.xc_snK_ktol > fock_precision) { + chem_env.ioptions.scf_options.xc_snK_ktol = fock_precision; + if(rank == 0) cout << "Resetting xc_snK_ktol to " << fock_precision << endl; + } + } + if(chem_env.ioptions.scf_options.xc_basis_tol > chem_env.ioptions.scf_options.conve) { + chem_env.ioptions.scf_options.xc_basis_tol = fock_precision; + if(rank == 0) + cout << "Resetting xc_basis_tol to " << chem_env.ioptions.scf_options.conve << endl; } +#endif // Compute Nuclear repulsion energy. auto [nelectrons, enuc] = scf_compute.compute_NRE(exc, chem_env.atoms); @@ -234,21 +252,25 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e #if defined(USE_GAUXC) double xHF; std::shared_ptr> gauxc_integrator_ptr; - if(chem_env.sys_data.is_ks) + if(chem_env.sys_data.is_ks || chem_env.sys_data.do_snK) std::tie(gauxc_integrator_ptr, xHF) = scf::gauxc::setup_gauxc(ec, chem_env, scf_vars); else xHF = 1.0; - auto gauxc_integrator = chem_env.sys_data.is_ks + auto gauxc_integrator = (chem_env.sys_data.is_ks || chem_env.sys_data.do_snK) ? GauXC::XCIntegrator(std::move(*gauxc_integrator_ptr)) : GauXC::XCIntegrator(); + scf_vars.xHF = xHF; if(rank == 0) cout << "HF exch = " << xHF << endl; #else const double xHF = 1.; #endif + // Compute SPH<->CART transformation + scf_compute.compute_trafo(chem_env.shells, etensors); + scf_vars.direct_df = do_density_fitting && chem_env.ioptions.scf_options.direct_df; - if(scf_vars.direct_df && xHF != 0.0) { + if(scf_vars.direct_df && xHF != 0.0 && !chem_env.sys_data.do_snK) { if(rank == 0) { - cout << "Direct DF cannot be used for xHF != 0.0" << endl; + cout << "Direct DF cannot be used without snK and xHF != 0.0" << endl; cout << "Falling back to in-core DF" << endl; } scf_vars.direct_df = false; @@ -381,8 +403,9 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e } #endif - if(!do_density_fitting || scf_vars.direct_df || chem_env.sys_data.is_ks) { - // needed for 4c HF only + if(!do_density_fitting || scf_vars.direct_df || chem_env.sys_data.is_ks || + chem_env.sys_data.do_snK) { + // needed for 4c HF, direct_df, KS, or snK etensors.D_alpha = Matrix::Zero(N, N); etensors.G_alpha = Matrix::Zero(N, N); if(chem_env.sys_data.is_unrestricted) { @@ -482,7 +505,8 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e if(chem_env.ioptions.scf_options.restart || chem_env.ioptions.scf_options.noscf) { // This was originally scf_restart.restart() scf_restart(ec, chem_env, scalapack_info, ttensors, etensors, files_prefix); - if(!do_density_fitting || scf_vars.direct_df || chem_env.sys_data.is_ks) { + if(!do_density_fitting || scf_vars.direct_df || chem_env.sys_data.is_ks || + chem_env.sys_data.do_snK) { tamm_to_eigen_tensor(ttensors.D_alpha, etensors.D_alpha); if(chem_env.sys_data.is_unrestricted) { tamm_to_eigen_tensor(ttensors.D_beta, etensors.D_beta); @@ -609,6 +633,21 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e shell2bf, SchwarzK, max_nprim4, ttensors, etensors, is_3c_init, do_density_fitting, xHF); +#if defined(USE_GAUXC) + // Add snK contribution + if(chem_env.sys_data.do_snK) { + const auto snK_start = std::chrono::high_resolution_clock::now(); + scf::gauxc::compute_exx(ec, chem_env, scf_vars, ttensors, etensors, + gauxc_integrator); + const auto snK_stop = std::chrono::high_resolution_clock::now(); + const auto snK_time = + std::chrono::duration_cast>((snK_stop - snK_start)).count(); + auto debug = chem_env.ioptions.scf_options.debug; + if(rank == 0 && debug) + std::cout << std::fixed << std::setprecision(2) << "snK: " << snK_time << "s, "; + } +#endif + if(sys_data.is_restricted) { // clang-format off sch @@ -646,7 +685,8 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e const auto xcf_time = std::chrono::duration_cast>((xcf_stop - xcf_start)).count(); auto debug = chem_env.ioptions.scf_options.debug; - if(rank == 0 && debug) std::cout << "xcf:" << xcf_time << "s, "; + if(rank == 0 && debug) + std::cout << std::fixed << std::setprecision(2) << "xcf: " << xcf_time << "s, "; } ehf += gauxc_exc; @@ -773,7 +813,7 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e // if(rank==0) cout << "D at the end of iteration: " << endl << std::setprecision(6) << // etensors.D_alpha << endl; - if(chem_env.ioptions.scf_options.writem % iter == 0 || + if(iter % chem_env.ioptions.scf_options.writem == 0 || chem_env.ioptions.scf_options.writem == 1) { scf_output.rw_md_disk(ec, chem_env, scalapack_info, ttensors, etensors, files_prefix); } diff --git a/exachem/scf/scf_iter.cpp b/exachem/scf/scf_iter.cpp index 19df75a..ade56f7 100644 --- a/exachem/scf/scf_iter.cpp +++ b/exachem/scf/scf_iter.cpp @@ -23,6 +23,7 @@ std::tuple exachem::scf::SCFIter::scf_iter_body( const bool is_uhf = sys_data.is_unrestricted; const bool is_rhf = sys_data.is_restricted; + const bool do_snK = sys_data.do_snK; const double lshift = scf_vars.lshift; Tensor& H1 = ttensors.H1; @@ -57,6 +58,19 @@ std::tuple exachem::scf::SCFIter::scf_iter_body( double ehf = 0.0; +#if defined(USE_GAUXC) + if(do_snK) { + const auto snK_start = std::chrono::high_resolution_clock::now(); + scf::gauxc::compute_exx(ec, chem_env, scf_vars, ttensors, etensors, + gauxc_integrator); + const auto snK_stop = std::chrono::high_resolution_clock::now(); + const auto snK_time = + std::chrono::duration_cast>((snK_stop - snK_start)).count(); + if(rank == 0 && debug) + std::cout << std::fixed << std::setprecision(2) << "snK: " << snK_time << "s, "; + } +#endif + if(is_rhf) { // clang-format off sch @@ -93,7 +107,8 @@ std::tuple exachem::scf::SCFIter::scf_iter_body( const auto xcf_stop = std::chrono::high_resolution_clock::now(); const auto xcf_time = std::chrono::duration_cast>((xcf_stop - xcf_start)).count(); - if(rank == 0 && debug) std::cout << "xcf:" << xcf_time << "s, "; + if(rank == 0 && debug) + std::cout << std::fixed << std::setprecision(2) << "xcf: " << xcf_time << "s, "; } ehf += gauxc_exc; @@ -660,8 +675,9 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& const libint2::BasisSet& obs = chem_env.shells; - const bool is_uhf = sys_data.is_unrestricted; - const bool is_spherical = (scf_options.gaussian_type == "spherical"); + const bool debug = scf_options.debug; + const bool is_uhf = sys_data.is_unrestricted; + // const bool is_spherical = (scf_options.gaussian_type == "spherical"); auto rank = ec.pg().rank(); @@ -676,7 +692,9 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& double fock_precision = std::min(scf_options.tol_sch, 1e-2 * scf_options.conve); const auto& unitshell = libint2::Shell::unit(); - auto engine = libint2::Engine(libint2::Operator::coulomb, dfbs.max_nprim(), dfbs.max_l(), 0); + auto engine = libint2::Engine(libint2::Operator::coulomb, + std::max(dfbs.max_nprim(), obs.max_nprim()), + std::max(obs.max_l(), dfbs.max_l()), 0); engine.set(libint2::BraKet::xs_xx); engine.set_precision(engine_precision); const auto& buf = engine.results(); @@ -686,8 +704,8 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& Matrix& G = etensors.G_alpha; auto& dfNorm = etensors.dfNorm; - auto shblk = is_spherical ? 2 * obs.max_l() + 1 : ((obs.max_l() + 1) * (obs.max_l() + 2)) / 2; - Matrix Dblk(shblk, shblk); + // auto shblk = is_spherical ? 2 * obs.max_l() + 1 : ((obs.max_l() + 1) * (obs.max_l() + 2)) / + // 2; Matrix Dblk(shblk, shblk); Matrix Jtmp = Matrix::Zero(1, ndf); Tensor& F_dummy = ttensors.F_dummy; @@ -696,6 +714,8 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& TiledIndexSpace tdummy{dummy}; Tensor Jtmp_tamm{tdummy, scf_vars.tdfAO}, Xtmp_tamm{tdummy, scf_vars.tdfAO}; + const auto buildJ_start = std::chrono::high_resolution_clock::now(); + sch.allocate(Jtmp_tamm, Xtmp_tamm).execute(); sch(Jtmp_tamm() = 0.0).execute(); @@ -733,8 +753,8 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& if(Norm12 * dfNorm.maxCoeff() < fock_precision) return; - Dblk.block(0, 0, n1, n2) = Jfactor * D.block(bf1_first, bf2_first, n1, n2); - if(is_uhf) Dblk.block(0, 0, n1, n2) += Jfactor * D_beta.block(bf1_first, bf2_first, n1, n2); + Matrix Dblk = D.block(bf1_first, bf2_first, n1, n2); + if(is_uhf) Dblk.block(0, 0, n1, n2) += D_beta.block(bf1_first, bf2_first, n1, n2); auto sp_iter = spdata.at(0).begin(); for(decltype(s1) s3 = 0; s3 < dfbs.size(); ++s3) { @@ -745,17 +765,16 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& if(Norm12 * dfNorm(s3) < fock_precision) continue; + engine.prescale_by(Jfactor); engine.compute2(dfbs[s3], unitshell, obs[s1], obs[s2], sp, sp12); const auto* buf_312 = buf[0]; if(buf_312 == nullptr) continue; - for(decltype(n1) f3 = 0, f312 = 0; f3 != n3; ++f3) { - const auto bf3 = f3 + bf3_first; + for(decltype(n1) bf3 = bf3_first, f312 = 0; bf3 != bf3_first + n3; ++bf3) for(decltype(n1) f1 = 0; f1 != n1; ++f1) for(decltype(n2) f2 = 0; f2 != n2; ++f2, ++f312) Jtmp(0, bf3) += buf_312[f312] * Dblk(f1, f2); - } } }; block_for(ec, F_dummy(), comp_J_lambda); @@ -763,6 +782,13 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& eigen_to_tamm_tensor_acc(Jtmp_tamm, Jtmp); ec.pg().barrier(); + // const auto buildJ_stop = std::chrono::high_resolution_clock::now(); + // const auto buildJ_time = + // std::chrono::duration_cast>((buildJ_stop - + // buildJ_start)).count(); + // if(rank == 0 && debug) std::cout << "buildJ: " << buildJ_time << "s, "; + + // const auto buildX_start = std::chrono::high_resolution_clock::now(); sch(Xtmp_tamm("i", "j") = Jtmp_tamm("i", "k") * Vm1("k", "j"))( Jtmp_tamm("i", "j") = Xtmp_tamm("i", "k") * Vm1("j", "k")) .deallocate(Xtmp_tamm) @@ -771,11 +797,15 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& Jtmp.setZero(); tamm_to_eigen_tensor(Jtmp_tamm, Jtmp); ec.pg().barrier(); - Tensor::deallocate(Jtmp_tamm); - - Eigen::VectorXd Jvec(Eigen::Map(Jtmp.data(), Jtmp.cols())); - + // const auto buildX_stop = std::chrono::high_resolution_clock::now(); + // const auto buildX_time = + // std::chrono::duration_cast>((buildX_stop - + // buildX_start)).count(); + // if(rank == 0 && debug) std::cout << "buildX: " << buildX_time << "s, "; + + // const auto buildF_start = std::chrono::high_resolution_clock::now(); + Eigen::VectorXd Jvec(Eigen::Map(Jtmp.data(), Jtmp.cols())); Eigen::Vector Jnorm; Jnorm.resize(dfbs.size()); for(size_t s3 = 0; s3 < dfbs.size(); ++s3) { @@ -783,6 +813,9 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& Jvec.segment(shell2bf_df[s3], dfbs[s3].size()).lpNorm() * dfNorm(s3); } + // Scale JVEC to account for permutational symmetry + Jvec *= 2.0; + auto comp_df_lambda = [&](IndexVector blockid) { auto s1 = blockid[0]; auto bf1_first = shell2bf[s1]; @@ -801,11 +834,10 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& const auto* sp12 = sp12_iter->get(); const auto Norm12 = SchwarzK(s1, s2); - const auto Jfactor = (s1 == s2) ? 1.0 : 2.0; - + const auto Jfactor = (s1 == s2) ? 0.5 : 1.0; if(Norm12 * Jnorm.maxCoeff() < fock_precision) return; - auto Xvec = Jfactor * Jvec; + Matrix J12 = Matrix::Zero(n1, n2); auto sp_iter = spdata.at(0).begin(); for(decltype(s1) s3 = 0; s3 < dfbs.size(); ++s3) { @@ -815,22 +847,17 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& ++sp_iter; if(Norm12 * Jnorm(s3) < fock_precision) continue; + engine.prescale_by(Jfactor); engine.compute2(dfbs[s3], unitshell, obs[s1], obs[s2], sp, sp12); const auto* buf_312 = buf[0]; if(buf_312 == nullptr) continue; - for(decltype(n1) f3 = 0, f312 = 0; f3 != n3; ++f3) { - const auto bf3 = f3 + bf3_first; - for(decltype(n1) f1 = 0; f1 != n1; ++f1) { - const auto bf1 = f1 + bf1_first; - for(decltype(n2) f2 = 0; f2 != n2; ++f2, ++f312) { - const auto bf2 = f2 + bf2_first; - G(bf1, bf2) += buf_312[f312] * Xvec(bf3); - } - } - } + for(decltype(n1) bf3 = bf3_first, f312 = 0; bf3 != bf3_first + n3; ++bf3) + for(decltype(n1) f1 = 0; f1 != n1; ++f1) + for(decltype(n2) f2 = 0; f2 != n2; ++f2, ++f312) J12(f1, f2) += buf_312[f312] * Jvec(bf3); } + G.block(bf1_first, bf2_first, n1, n2) = J12; }; block_for(ec, F_dummy(), comp_df_lambda); @@ -838,6 +865,17 @@ void exachem::scf::SCFIter::compute_2bf_ri_direct(ExecutionContext& ec, ChemEnv& ec.pg().barrier(); if(is_uhf) sch(ttensors.F_beta_tmp() = ttensors.F_alpha_tmp()).execute(); + + const auto buildF_stop = std::chrono::high_resolution_clock::now(); + // const auto buildF_time = + // std::chrono::duration_cast>((buildF_stop - + // buildF_start)).count(); + // if(rank == 0 && debug) std::cout << "buildF: " << buildF_time << "s, "; + + const auto total_time = + std::chrono::duration_cast>((buildF_stop - buildJ_start)).count(); + if(rank == 0 && debug) + std::cout << std::fixed << std::setprecision(2) << "DF-J: " << total_time << "s, "; }; template @@ -851,6 +889,7 @@ void exachem::scf::SCFIter::compute_2bf_ri(ExecutionContext& ec, ChemEnv& chem_e const bool is_uhf = sys_data.is_unrestricted; const bool is_rhf = sys_data.is_restricted; + const bool do_snK = sys_data.do_snK; auto rank = ec.pg().rank(); auto debug = scf_options.debug; @@ -898,7 +937,7 @@ void exachem::scf::SCFIter::compute_2bf_ri(ExecutionContext& ec, ChemEnv& chem_e if(rank == 0 && debug) std::cout << " J: " << std::fixed << std::setprecision(2) << igtime << "s, "; - if(xHF > 0.0) { + if(xHF > 0.0 && !do_snK) { ig1 = std::chrono::high_resolution_clock::now(); TensorType factor = is_rhf ? 0.5 : 1.0; @@ -946,7 +985,8 @@ void exachem::scf::SCFIter::compute_2bf(ExecutionContext& ec, ChemEnv& chem_env, const bool is_uhf = sys_data.is_unrestricted; const bool is_rhf = sys_data.is_restricted; - const bool doK = xHF != 0.0; + const bool do_snK = sys_data.do_snK; + const bool doK = xHF != 0.0 && !do_snK; const bool is_spherical = (scf_options.gaussian_type == "spherical"); Matrix& G = etensors.G_alpha; @@ -1051,7 +1091,7 @@ void exachem::scf::SCFIter::compute_2bf(ExecutionContext& ec, ChemEnv& chem_env, if(do_schwarz_screen && Dnorm1234 * SchwarzK(s1, s2) * SchwarzK(s3, s4) < fock_precision) continue; - if(xHF == 0.0) { + if(!doK) { if(do_schwarz_screen && Dnorm12 * SchwarzK(s1, s2) * SchwarzK(s3, s4) < fock_precision && Dnorm34 * SchwarzK(s1, s2) * SchwarzK(s3, s4) < fock_precision) continue; diff --git a/exachem/scf/scf_vars.hpp b/exachem/scf/scf_vars.hpp index fe002a4..145aeef 100644 --- a/exachem/scf/scf_vars.hpp +++ b/exachem/scf/scf_vars.hpp @@ -18,11 +18,13 @@ class SCFVars { bool switch_diis = false; double exc = 0.0; double eqed = 0.0; + bool do_snK = false; bool do_dens_fit = false; bool direct_df = false; bool do_load_bal = false; bool lshift_reset = false; double lshift = 0; + double xHF = 1.0; libecpint::ECPIntegrator ecp_factory; // AO spaces diff --git a/inputs/example.json b/inputs/example.json index eecfd7f..437986b 100644 --- a/inputs/example.json +++ b/inputs/example.json @@ -48,19 +48,36 @@ "damp": 100, "nnodes": 1, "writem": 10, + "debug": false, "restart": false, "noscf": false, "scf_type": "restricted", - "xc_type": [], - "xc_grid_type": "UltraFine", - "debug": false, + "direct_df": false, + "DFT": { + "snK": false, + "xc_type": [], + "xc_grid_type": "UltraFine", + "xc_pruning_scheme": "Robust", + "xc_rad_quad": "MK", + "xc_weight_scheme": "SSF", + "xc_exec_space": "Host", + "xc_basis_tol": 1e-10, + "xc_batch_size": 512, + "xc_snK_etol": 1e-10, + "xc_snK_ktol": 1e-10, + "xc_lb_kernel": "Default", + "xc_mw_kernel": "Default", + "xc_int_kernel": "Default", + "xc_red_kernel": "Default", + "xc_lwd_kernel": "Default", + "xc_radang_size": [0, 0] + }, "restart_size": 2000, "scalapack_nb": 256, "scalapack_np_row": 0, "scalapack_np_col": 0, "n_lindep": 0, "moldenfile": "", - "direct_df": false, "guess": { "atom_options":{ "O": [0,1],