From 66d2dff1b30038e9d951a3fe499d80cf58144fe2 Mon Sep 17 00:00:00 2001
From: Ajay Panyala <ajay.panyala@gmail.com>
Date: Fri, 20 Sep 2024 00:39:41 -0700
Subject: [PATCH] cleanup scalapack logic, update CC script

---
 .github/workflows/c-cpp.yaml             |  4 +-
 exachem/cc/scripts/ccsd_advisor.py       | 38 ++++++++--
 exachem/common/cutils.cpp                | 92 +++++++++++-------------
 exachem/common/cutils.hpp                | 36 +++++-----
 exachem/common/options/input_options.hpp |  2 +-
 exachem/scf/scf_hartree_fock.cpp         | 20 ++----
 exachem/scf/scf_iter.cpp                 |  4 +-
 exachem/scf/scf_outputs.cpp              |  2 +-
 inputs/scripts/nwchem_to_exachem.py      |  4 +-
 inputs/scripts/xyz_to_exachem.py         |  2 -
 methods/ExaChem.cpp                      |  5 +-
 11 files changed, 107 insertions(+), 102 deletions(-)

diff --git a/.github/workflows/c-cpp.yaml b/.github/workflows/c-cpp.yaml
index 1cafe9d..87fe9f9 100644
--- a/.github/workflows/c-cpp.yaml
+++ b/.github/workflows/c-cpp.yaml
@@ -150,8 +150,8 @@ jobs:
       if: ${{ matrix.os[1] == 'macos' }}
       shell: bash
       run: |
-        echo "CC=gcc-13" >> $GITHUB_ENV
-        echo "CXX=g++-13" >> $GITHUB_ENV
+        echo "CC=gcc-14" >> $GITHUB_ENV
+        echo "CXX=g++-14" >> $GITHUB_ENV
         echo "LA_VENDOR=OpenBLAS" >> $GITHUB_ENV
 
     - name: Cache install steps (backend = ga)
diff --git a/exachem/cc/scripts/ccsd_advisor.py b/exachem/cc/scripts/ccsd_advisor.py
index 8267be1..f5ea695 100644
--- a/exachem/cc/scripts/ccsd_advisor.py
+++ b/exachem/cc/scripts/ccsd_advisor.py
@@ -5,6 +5,27 @@
 
 #python ccsd_advisor.py -oa 99 -ob 94 -va 394 -vb 399 -cv 4027 -ppn 4 -ram 512 -ctype uhf -diis 5 -nranks 10 -cache 8 -ts 32
 
+def get_mo_tiles(noa,nob,nva,nvb,ts):
+    est_nt = math.ceil(1.0 * noa / ts)
+    mo_tiles = []
+    for x in range(0, est_nt):
+      mo_tiles.append(int(noa / est_nt + (x < (noa % est_nt))))
+
+    # est_nt = math.ceil(1.0 * nob / ts)
+    # for x in range(0, est_nt):
+    #   mo_tiles.append(int(nob / est_nt + (x < (nob % est_nt))))
+
+    est_nt = math.ceil(1.0 * nva / ts)
+    for x in range(0, est_nt):
+       mo_tiles.append(int(nva / est_nt + (x < (nva % est_nt))))
+
+    # est_nt = math.ceil(1.0 * nvb / ts)
+    # for x in range(0, est_nt): 
+    #    mo_tiles.append(int(nvb / est_nt + (x < (nvb % est_nt))))
+
+    return mo_tiles
+
+
 def parseargs(argv=None):
 
     '''Command line options.'''
@@ -202,14 +223,13 @@ def parseargs(argv=None):
 print("nbf: "    + str(nbf))
 
 print("\nTotal CPU memory required for Cholesky decomp of the 2e integrals: " + str(chol_mem) + " GiB")
-print("\nTotal CPU memory required for CCSD calculation: " + str(ccsd_mem) + " GiB")
+print("\nTotal CPU memory required for CCSD calculation: " + str(ccsd_mem) + " GiB\n")
 
 
 VabOab = v_alpha*o_beta*v_beta*o_alpha
 ts_guess=50
 ts_max=ts_guess
-tilesizes = list(range(ts_guess, 501, 5))
-tilesizes.insert(0,73)
+tilesizes = list(range(ts_guess, 501, 10))
 
 def get_ts_recommendation(tilesizes,nranks):
     ts_guess_ = tilesizes[0]
@@ -217,9 +237,10 @@ def get_ts_recommendation(tilesizes,nranks):
     nblocks_  = 10
     for ts in tilesizes:
         nblocks = math.ceil(v_alpha/ts) * math.ceil(o_alpha/ts) * math.ceil(v_beta/ts) * math.ceil(o_beta/ts)
-        # print("nblocks %s for TS = %s " %(nblocks,ts))
+        # print ("  --> MO Tiles for tilesize %s, nblocks=%s: %s" %(ts, nblocks, get_mo_tiles(o_alpha,o_beta,v_alpha,v_beta,ts)))
         ts_max_ = ts
-        if nblocks <= nranks:
+        #nblocks <= nranks
+        if (nblocks*1.0/nranks) < 0.31:
             ts_max_ = ts_guess_
             break
         ts_guess_=ts
@@ -229,12 +250,16 @@ def get_ts_recommendation(tilesizes,nranks):
 
 [ts_max,nblocks] = get_ts_recommendation(tilesizes,nranks)
 print("Min #nodes required = %s, nranks = %s, nblocks = %s, max tilesize = %s" %(nnodes, nranks, nblocks, ts_max))
+# print ("  --> MO Tiles = %s" %(get_mo_tiles(o_alpha,o_beta,v_alpha,v_beta,ts_max)))
 
 nodecounts = list(range(nnodes+10, nnodes*10+1, 10))
 for nc in nodecounts:
+    # print ("-----------------------------------------------------------------------")
     [ts_max,nblocks] = get_ts_recommendation(tilesizes,nc*ppn)
-    if nblocks <= nc*ppn: break
+    # if nblocks <= nc*ppn: break
+    if (nblocks*1.0/nc*ppn) < 0.31: break
     print("For node count = %s, nranks = %s, nblocks = %s, max tilesize = %s" %(nc, nc*ppn, nblocks, ts_max))
+    # print ("  --> MO Tiles = %s" %(get_mo_tiles(o_alpha,o_beta,v_alpha,v_beta,ts_max)))
 
 
 # (T)
@@ -326,6 +351,7 @@ def ft_mem(i,j,k,l) :
 total_extra_buf_mem = round(total_extra_buf_mem,2)
 
 total_ccsd_t_mem = ccsd_t_mem + total_extra_buf_mem + total_cache_mem
+total_ccsd_t_mem = round(total_ccsd_t_mem,2)
 
 print("\nTotal CPU memory required for (T) calculation: " + str(total_ccsd_t_mem) + " GiB" + ", Min nodes required: " + str(math.ceil(total_ccsd_t_mem/cpu_mem)))
 print("-- memory required for the input tensors: " + str(ccsd_t_mem) + " GiB")
diff --git a/exachem/common/cutils.cpp b/exachem/common/cutils.cpp
index f84325b..627e5d5 100644
--- a/exachem/common/cutils.cpp
+++ b/exachem/common/cutils.cpp
@@ -26,50 +26,56 @@ MPI_Comm get_scalapack_comm(tamm::ExecutionContext& ec, int sca_nranks) {
   return scacomm;
 }
 
-void setup_scalapack_info(ChemEnv& chem_env, ScalapackInfo& scalapack_info, MPI_Comm& scacomm) {
+void setup_scalapack_info(tamm::ExecutionContext& ec, ChemEnv& chem_env,
+                          ScalapackInfo& scalapack_info, ProcGroupData& pgdata) {
   SystemData& sys_data    = chem_env.sys_data;
   SCFOptions& scf_options = chem_env.ioptions.scf_options;
 #if defined(USE_UPCXX)
   abort(); // Not supported with UPC++
 #endif
-  scalapack_info.comm = scacomm;
-  if(scacomm == MPI_COMM_NULL) return;
-  auto blacs_setup_st = std::chrono::high_resolution_clock::now();
-  // Sanity checks
-  scalapack_info.npr   = scf_options.scalapack_np_row;
-  scalapack_info.npc   = scf_options.scalapack_np_col;
-  int scalapack_nranks = scalapack_info.npr * scalapack_info.npc;
-
-  scalapack_info.pg = ProcGroup::create_coll(scacomm);
-  scalapack_info.ec =
-    ExecutionContext{scalapack_info.pg, DistributionKind::dense, MemoryManagerKind::ga};
 
-  int sca_world_size = scalapack_info.pg.size().value();
-
-  // Default to square(ish) grid
-  if(scalapack_nranks == 0) {
-    int64_t npr = std::sqrt(sca_world_size);
-    int64_t npc = sca_world_size / npr;
-    while(npr * npc != sca_world_size) {
-      npr--;
-      npc = sca_world_size / npr;
-    }
-    scalapack_nranks   = sca_world_size;
-    scalapack_info.npr = npr;
-    scalapack_info.npc = npc;
-  }
+  scalapack_info.npr = scf_options.scalapack_np_row;
+  scalapack_info.npc = scf_options.scalapack_np_col;
+  int sca_user_ranks = scalapack_info.npr * scalapack_info.npc;
+
+  // node_p_sca = % of nodes, nbf_p_sca = 4 = % of nbf for scalapack
+  int sca_nranks = std::ceil(sys_data.nbf_orig * (4 / 100.0));
+  // if(node_p_sca > 0) sca_nranks = std::ceil((node_p_sca / 100.0) * nnodes);
+  if(sca_user_ranks > 0) sca_nranks = sca_user_ranks;
+  if(sca_nranks > pgdata.spg_nranks) sca_nranks = pgdata.spg_nranks;
+  // Find nearest square
+  sca_nranks = std::pow(std::floor(std::sqrt(sca_nranks)), 2);
+  if(sca_nranks == 0) sca_nranks = 1;
+
+  int sca_nnodes = sca_nranks / pgdata.ppn;
+  if(sca_nranks % pgdata.ppn > 0 || sca_nnodes == 0) sca_nnodes++;
+  // if(sca_nnodes > pgdata.spg_nnodes) sca_nnodes = pgdata.spg_nnodes;
+
+  pgdata.scalapack_nnodes         = sca_nnodes;
+  pgdata.scalapack_nranks         = sca_nranks;
+  scalapack_info.scalapack_nranks = sca_nranks;
+
+  // Always use square grid
+  scalapack_info.npr = std::sqrt(sca_nranks);
+  scalapack_info.npc = scalapack_info.npr;
 
-  EXPECTS(sca_world_size >= scalapack_nranks);
+  scalapack_info.comm = get_scalapack_comm(ec, sca_nranks);
 
-  if(not scalapack_nranks) scalapack_nranks = sca_world_size;
-  std::vector<int64_t> scalapack_ranks(scalapack_nranks);
+  if(scalapack_info.comm == MPI_COMM_NULL) return;
+  // auto blacs_setup_st = std::chrono::high_resolution_clock::now();
+
+  scalapack_info.pg = ProcGroup::create_coll(scalapack_info.comm);
+  scalapack_info.ec =
+    ExecutionContext{scalapack_info.pg, DistributionKind::dense, MemoryManagerKind::ga};
+
+  std::vector<int64_t> scalapack_ranks(sca_nranks);
   std::iota(scalapack_ranks.begin(), scalapack_ranks.end(), 0);
-  scalapack_info.scalapack_nranks = scalapack_nranks;
 
   int& mb_ = scf_options.scalapack_nb;
 
   if(scalapack_info.pg.rank() == 0) {
-    std::cout << "scalapack_nranks = " << scalapack_nranks << std::endl;
+    std::cout << "scalapack_nnodes = " << sca_nnodes << std::endl;
+    std::cout << "scalapack_nranks = " << sca_nranks << std::endl;
     std::cout << "scalapack_np_row = " << scalapack_info.npr << std::endl;
     std::cout << "scalapack_np_col = " << scalapack_info.npc << std::endl;
     std::cout << "scalapack_nb     = " << mb_ << std::endl;
@@ -90,9 +96,9 @@ void setup_scalapack_info(ChemEnv& chem_env, ScalapackInfo& scalapack_info, MPI_
   scalapack_info.blockcyclic_dist =
     std::make_unique<scalapackpp::BlockCyclicDist2D>(*scalapack_info.blacs_grid, mb_, mb_, 0, 0);
 
-  auto blacs_setup_en = std::chrono::high_resolution_clock::now();
+  // auto blacs_setup_en = std::chrono::high_resolution_clock::now();
 
-  std::chrono::duration<double> blacs_time = blacs_setup_en - blacs_setup_st;
+  // std::chrono::duration<double> blacs_time = blacs_setup_en - blacs_setup_st;
 
   // if(scalapack_info.pg.rank() == 0)
   //   std::cout << std::fixed << std::setprecision(2) << std::endl
@@ -103,7 +109,7 @@ void setup_scalapack_info(ChemEnv& chem_env, ScalapackInfo& scalapack_info, MPI_
 
 // Nbf, % of nodes, % of Nbf, nnodes from input file, (% of nodes, % of nbf) for scalapack
 ProcGroupData get_spg_data(ExecutionContext& ec, const size_t N, const int node_p, const int nbf_p,
-                           const int node_inp, const int node_p_sca, const int nbf_p_sca) {
+                           const int node_inp) {
   ProcGroupData pgdata;
   pgdata.nnodes = ec.nnodes();
   pgdata.ppn    = ec.ppn();
@@ -126,23 +132,5 @@ ProcGroupData get_spg_data(ExecutionContext& ec, const size_t N, const int node_
   pgdata.spg_nnodes = spg_nnodes;
   pgdata.spg_nranks = spg_nranks;
 
-#if defined(USE_SCALAPACK)
-  // Find nearest square
-  int sca_nranks = std::ceil(N * (nbf_p_sca / 100.0));
-  if(node_p_sca > 0) sca_nranks = std::ceil((node_p_sca / 100.0) * nnodes);
-  if(sca_nranks > spg_nranks) sca_nranks = spg_nranks;
-  sca_nranks = std::pow(std::floor(std::sqrt(sca_nranks)), 2);
-  if(sca_nranks == 0) sca_nranks = 1;
-  int sca_nnodes = sca_nranks / ppn;
-  if(sca_nranks % ppn > 0 || sca_nnodes == 0) sca_nnodes++;
-  if(sca_nnodes > nnodes) sca_nnodes = nnodes;
-  // if(sca_nnodes == 1) ppn = sca_nranks; // single node case
-  pgdata.scalapack_nnodes = sca_nnodes;
-  pgdata.scalapack_nranks = sca_nranks;
-#else
-  pgdata.scalapack_nnodes = spg_nnodes;
-  pgdata.scalapack_nranks = spg_nranks;
-#endif
-
   return pgdata;
 }
diff --git a/exachem/common/cutils.hpp b/exachem/common/cutils.hpp
index 16285c7..70c2fbf 100644
--- a/exachem/common/cutils.hpp
+++ b/exachem/common/cutils.hpp
@@ -53,6 +53,19 @@ struct ScalapackInfo {
 };
 #endif
 
+// Contains node, ppn information used for creating a smaller process group from world group
+struct ProcGroupData {
+  int nnodes{};     // total number of nodes
+  int spg_nnodes{}; // number of nodes in smaller process group
+  int ppn{};        // processes per node
+  int spg_nranks{}; // number of rank in smaller process group
+  // #nodes used for scalapack operations can further be a subset of the smaller process group
+  int scalapack_nnodes{};
+  int scalapack_nranks{};
+
+  auto unpack() { return std::make_tuple(nnodes, spg_nnodes, ppn, spg_nranks); }
+};
+
 #if defined(USE_SCALAPACK)
 struct ScalapackInfo {
   int64_t                                         npr{}, npc{}, scalapack_nranks{};
@@ -66,25 +79,10 @@ struct ScalapackInfo {
 
 MPI_Comm get_scalapack_comm(tamm::ExecutionContext& ec, int sca_nranks);
 
-void setup_scalapack_info(ChemEnv& chem_env, ScalapackInfo& scalapack_info, MPI_Comm& scacomm);
+void setup_scalapack_info(tamm::ExecutionContext& ec, ChemEnv& chem_env,
+                          ScalapackInfo& scalapack_info, ProcGroupData& pgdata);
 #endif
 
-// Contains node, ppn information used for creating a smaller process group from world group
-struct ProcGroupData {
-  int nnodes{};     // total number of nodes
-  int spg_nnodes{}; // number of nodes in smaller process group
-  int ppn{};        // processes per node
-  int spg_nranks{}; // number of rank in smaller process group
-  // #nodes used for scalapack operations can further be a subset of the smaller process group
-  int scalapack_nnodes{};
-  int scalapack_nranks{};
-
-  auto unpack() {
-    return std::make_tuple(nnodes, spg_nnodes, ppn, spg_nranks, scalapack_nnodes, scalapack_nranks);
-  }
-};
-
-// Nbf, % of nodes, % of Nbf, nnodes from input file, (% of nodes, % of nbf) for scalapack
+// Nbf, % of nodes, % of Nbf, nnodes from input file
 ProcGroupData get_spg_data(ExecutionContext& ec, const size_t N, const int node_p,
-                           const int nbf_p = -1, const int node_inp = -1, const int node_p_sca = -1,
-                           const int nbf_p_sca = -1);
+                           const int nbf_p = -1, const int node_inp = -1);
diff --git a/exachem/common/options/input_options.hpp b/exachem/common/options/input_options.hpp
index 164b380..af65a4c 100644
--- a/exachem/common/options/input_options.hpp
+++ b/exachem/common/options/input_options.hpp
@@ -43,7 +43,7 @@ class SCFOptions: public CommonOptions {
   int      multiplicity{1};
   double   lshift{0};        // level shift factor, +ve value b/w 0 and 1
   double   tol_int{1e-22};   // tolerance for integral primitive screening
-  double   tol_sch{1e-10};   // tolerance for schwarz screening
+  double   tol_sch{1e-12};   // tolerance for schwarz screening
   double   tol_lindep{1e-5}; // tolerance for linear dependencies
   double   conve{1e-8};      // energy convergence
   double   convd{1e-7};      // density convergence
diff --git a/exachem/scf/scf_hartree_fock.cpp b/exachem/scf/scf_hartree_fock.cpp
index 5017187..dd8310b 100644
--- a/exachem/scf/scf_hartree_fock.cpp
+++ b/exachem/scf/scf_hartree_fock.cpp
@@ -54,8 +54,8 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e
   // -------------Everythin related to Basis Sets-----------------------------
 
 #if SCF_THROTTLE_RESOURCES
-  ProcGroupData pgdata = get_spg_data(exc, N, -1, 50, chem_env.ioptions.scf_options.nnodes, -1, 4);
-  auto [t_nnodes, hf_nnodes, ppn, hf_nranks, sca_nnodes, sca_nranks] = pgdata.unpack();
+  ProcGroupData pgdata = get_spg_data(exc, N, -1, 50, chem_env.ioptions.scf_options.nnodes);
+  auto [t_nnodes, hf_nnodes, ppn, hf_nranks] = pgdata.unpack();
 
 #if defined(USE_UPCXX)
   bool         in_new_team = (rank < hf_nranks);
@@ -76,9 +76,6 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e
   MPI_Group_free(&hfgroup);
 #endif
 
-#if defined(USE_SCALAPACK)
-  MPI_Comm scacomm = get_scalapack_comm(exc, sca_nranks);
-#endif
 #endif
 
   if(rank == 0) {
@@ -86,10 +83,6 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e
 #if SCF_THROTTLE_RESOURCES
     std::cout << "Number of nodes, processes per node used for SCF calculation: " << hf_nnodes
               << ", " << ppn << std::endl;
-#endif
-#if defined(USE_SCALAPACK)
-    cout << "Number of nodes, processes per node, total processes used for Scalapack operations: "
-         << sca_nnodes << ", " << sca_nranks / sca_nnodes << ", " << sca_nranks << endl;
 #endif
     chem_env.ioptions.common_options.print();
     chem_env.ioptions.scf_options.print();
@@ -268,7 +261,8 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e
 
     ScalapackInfo scalapack_info;
 #if defined(USE_SCALAPACK)
-    setup_scalapack_info(chem_env, scalapack_info, scacomm);
+    setup_scalapack_info(ec, chem_env, scalapack_info, pgdata);
+    MPI_Comm scacomm = scalapack_info.comm;
 #endif
 
 #if defined(USE_GAUXC)
@@ -585,12 +579,12 @@ void exachem::scf::SCFHartreeFock::scf_hf(ExecutionContext& exc, ChemEnv& chem_e
       Matrix S(chem_env.sys_data.nbf_orig, chem_env.sys_data.nbf_orig);
       tamm_to_eigen_tensor(ttensors.S1, S);
       if(chem_env.sys_data.is_restricted)
-        cout << "debug #electrons       = " << (int) std::ceil((etensors.D_alpha * S).trace())
+        cout << "debug #electrons       = " << (int) std::round((etensors.D_alpha * S).trace())
              << endl;
       if(chem_env.sys_data.is_unrestricted) {
-        cout << "debug #alpha electrons = " << (int) std::ceil((etensors.D_alpha * S).trace())
+        cout << "debug #alpha electrons = " << (int) std::round((etensors.D_alpha * S).trace())
              << endl;
-        cout << "debug #beta  electrons = " << (int) std::ceil((etensors.D_beta * S).trace())
+        cout << "debug #beta  electrons = " << (int) std::round((etensors.D_beta * S).trace())
              << endl;
       }
     }
diff --git a/exachem/scf/scf_iter.cpp b/exachem/scf/scf_iter.cpp
index 5f40673..9c90cec 100644
--- a/exachem/scf/scf_iter.cpp
+++ b/exachem/scf/scf_iter.cpp
@@ -23,7 +23,6 @@ std::tuple<TensorType, TensorType> exachem::scf::SCFIter::scf_iter_body(
 
   const bool   is_uhf = sys_data.is_unrestricted;
   const bool   is_rhf = sys_data.is_restricted;
-  const bool   do_snK = sys_data.do_snK;
   const double lshift = scf_vars.lshift;
 
   Tensor<TensorType>& H1       = ttensors.H1;
@@ -59,6 +58,7 @@ std::tuple<TensorType, TensorType> exachem::scf::SCFIter::scf_iter_body(
   double ehf = 0.0;
 
 #if defined(USE_GAUXC)
+  const bool do_snK = sys_data.do_snK;
   if(do_snK) {
     const auto snK_start = std::chrono::high_resolution_clock::now();
     scf::gauxc::compute_exx<TensorType>(ec, chem_env, scf_vars, ttensors, etensors,
@@ -448,8 +448,6 @@ void exachem::scf::SCFIter::compute_2c_ints(ExecutionContext& ec, ChemEnv& chem_
 
   SCFOptions& scf_options = chem_env.ioptions.scf_options;
 
-  auto rank = ec.pg().rank();
-
   const libint2::BasisSet&   dfbs              = scf_vars.dfbs;
   const std::vector<Tile>&   dfAO_tiles        = scf_vars.dfAO_tiles;
   const std::vector<size_t>& df_shell_tile_map = scf_vars.df_shell_tile_map;
diff --git a/exachem/scf/scf_outputs.cpp b/exachem/scf/scf_outputs.cpp
index 0a078d6..bb6548e 100644
--- a/exachem/scf/scf_outputs.cpp
+++ b/exachem/scf/scf_outputs.cpp
@@ -62,7 +62,7 @@ void exachem::scf::SCFIO::print_energies(ExecutionContext& ec, ChemEnv& chem_env
   }
 
   if(ec.pg().rank() == 0) {
-    std::cout << "#electrons        = " << (int) std::ceil(nelectrons) << endl;
+    std::cout << "#electrons        = " << (int) std::round(nelectrons) << endl;
     std::cout << "1e energy kinetic = " << std::setprecision(16) << kinetic_1e << endl;
     std::cout << "1e energy N-e     = " << NE_1e << endl;
     std::cout << "1e energy         = " << energy_1e << endl;
diff --git a/inputs/scripts/nwchem_to_exachem.py b/inputs/scripts/nwchem_to_exachem.py
index 69946b9..a4e2166 100644
--- a/inputs/scripts/nwchem_to_exachem.py
+++ b/inputs/scripts/nwchem_to_exachem.py
@@ -110,7 +110,7 @@ def parse_nwchem_input(input_file):
   nwchem_opt["SCF"]["PRINT"] = {}
   scf_opt = nwchem_opt["SCF"]
   scf_opt["tol_int"] = 1e-22
-  scf_opt["tol_sch"] = 1e-10
+  scf_opt["tol_sch"] = 1e-12
   scf_opt["tol_lindep"] = 1e-5
   scf_opt["conve"] = 1e-8
   scf_opt["convd"] = 1e-7
@@ -129,7 +129,7 @@ def parse_nwchem_input(input_file):
   dft_opt = nwchem_opt["DFT"]
   dft_opt["hfexch"] = False
   dft_opt["tol_int"] = 1e-22
-  dft_opt["tol_sch"] = 1e-10
+  dft_opt["tol_sch"] = 1e-12
   dft_opt["tol_lindep"] = 1e-5
   dft_opt["conve"] = 1e-8
   dft_opt["convd"] = 1e-7
diff --git a/inputs/scripts/xyz_to_exachem.py b/inputs/scripts/xyz_to_exachem.py
index 9cc4b2a..b82546a 100644
--- a/inputs/scripts/xyz_to_exachem.py
+++ b/inputs/scripts/xyz_to_exachem.py
@@ -126,8 +126,6 @@ def dict_to_json(dictname):
 
     exachem_opt["SCF"] = {}
     scf_opt = exachem_opt["SCF"]
-    scf_opt["tol_int"] = 1e-12
-    scf_opt["tol_lindep"] = 1e-5
     scf_opt["conve"] = 1e-8
     scf_opt["convd"] = 1e-7
     scf_opt["diis_hist"] = 10
diff --git a/methods/ExaChem.cpp b/methods/ExaChem.cpp
index ae1f974..fab43b8 100644
--- a/methods/ExaChem.cpp
+++ b/methods/ExaChem.cpp
@@ -70,8 +70,11 @@ int main(int argc, char* argv[]) {
     std::cout << "nnodes: " << ec.nnodes() << ", ";
     std::cout << "nproc_per_node: " << ec.ppn() << ", ";
     std::cout << "nproc_total: " << ec.nnodes() * ec.ppn() << ", ";
+#if defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP)
     std::cout << "ngpus_per_node: " << ec.gpn() << ", ";
-    std::cout << "ngpus_total: " << ec.nnodes() * ec.gpn() << endl << endl;
+    std::cout << "ngpus_total: " << ec.nnodes() * ec.gpn() << endl;
+#endif
+    std::cout << std::endl;
     ec.print_mem_info();
   }