From bda098b9a74ca8b7e7ca5d1d05e8c5fc76a7e3ab Mon Sep 17 00:00:00 2001 From: Ajay Panyala Date: Tue, 19 Dec 2023 20:07:10 -0800 Subject: [PATCH] [SCF] adjust lshift based on homo-lumo gap, [CI] update reference output files --- .github/workflows/c-cpp.yaml | 62 ++-- .../ubiquitin_dgrtl.6-31g.ccsd_t.json | 335 ++++++++++++++++++ .../ubiquitin_dgrtl.cc-pvdz.ccsd_t.json | 327 +++++++++++++++++ .../ubiquitin_dgrtl.sto-3g.ccsd_t.json | 0 ci/scripts/compare_results.py | 4 +- exachem/cc/ccsd/cd_ccsd_cs_ann.cpp | 3 +- exachem/cc/ccsd_t/ccsd_t.cpp | 16 +- exachem/cc/scripts/ccsd_memory.py | 9 +- exachem/scf/scf_common.hpp | 14 +- exachem/scf/scf_guess.cpp | 38 +- exachem/scf/scf_guess.hpp | 4 +- exachem/scf/scf_iter.cpp | 4 +- exachem/scf/scf_main.cpp | 6 +- 13 files changed, 763 insertions(+), 59 deletions(-) create mode 100644 ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.6-31g.ccsd_t.json create mode 100644 ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.cc-pvdz.ccsd_t.json rename ci/reference_output/{ => ubiquitin_dgrtl}/ubiquitin_dgrtl.sto-3g.ccsd_t.json (100%) diff --git a/.github/workflows/c-cpp.yaml b/.github/workflows/c-cpp.yaml index 804f8a0..20fb7c4 100644 --- a/.github/workflows/c-cpp.yaml +++ b/.github/workflows/c-cpp.yaml @@ -19,7 +19,7 @@ jobs: matrix: os: - [self-hosted, ubuntu18] - - [self-hosted, macos] + # - [self-hosted, macos] backend: - ga - upcxx @@ -68,23 +68,23 @@ jobs: backend: ga use_cuda: no-cuda use_scalapack: scalapack - - os: [self-hosted, macos] - mpi_impl: openmpi - cxx: g++ - cc: gcc - fc: gfortran - backend: ga - use_cuda: no-cuda - use_scalapack: no-scalapack - exclude: - - os: [self-hosted, macos] - mpi_impl: openmpi - cxx: g++ - cc: gcc - fc: gfortran - backend: upcxx - use_cuda: no-cuda - use_scalapack: no-scalapack + # - os: [self-hosted, macos] + # mpi_impl: openmpi + # cxx: g++ + # cc: gcc + # fc: gfortran + # backend: ga + # use_cuda: no-cuda + # use_scalapack: no-scalapack + # exclude: + # - os: [self-hosted, macos] + # mpi_impl: openmpi + # cxx: g++ + # cc: gcc + # fc: gfortran + # backend: upcxx + # use_cuda: no-cuda + # use_scalapack: no-scalapack fail-fast: true env: @@ -102,12 +102,12 @@ jobs: echo "simd=$(${GITHUB_WORKSPACE}/.github/workflows/scripts/check_simd.sh)" >> $GITHUB_ENV shell: bash - - name: Set cache path mac - if: ${{ matrix.os[1] == 'macos' }} - id: set-cache-path-mac - run: | - echo "exachem_cache_path=$HOME/software/CI/cache/exachem_cache" >> $GITHUB_ENV - shell: bash + # - name: Set cache path mac + # if: ${{ matrix.os[1] == 'macos' }} + # id: set-cache-path-mac + # run: | + # echo "exachem_cache_path=$HOME/software/CI/cache/exachem_cache" >> $GITHUB_ENV + # shell: bash - name: Set cache path linux if: ${{ matrix.os[1] == 'ubuntu18' }} @@ -145,13 +145,13 @@ jobs: echo "CC=$(which clang)" >> $GITHUB_ENV echo "CXX=$(which clang++)" >> $GITHUB_ENV - - name: macos env - if: ${{ matrix.os[1] == 'macos' }} - shell: bash - run: | - echo "CC=gcc-12" >> $GITHUB_ENV - echo "CXX=g++-12" >> $GITHUB_ENV - echo "EC_NPROC=2" >> $GITHUB_ENV + # - name: macos env + # if: ${{ matrix.os[1] == 'macos' }} + # shell: bash + # run: | + # echo "CC=gcc-13" >> $GITHUB_ENV + # echo "CXX=g++-13" >> $GITHUB_ENV + # echo "EC_NPROC=2" >> $GITHUB_ENV - name: Cache install steps (backend = ga) if: ${{ matrix.backend == 'ga' }} diff --git a/ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.6-31g.ccsd_t.json b/ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.6-31g.ccsd_t.json new file mode 100644 index 0000000..3e6500d --- /dev/null +++ b/ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.6-31g.ccsd_t.json @@ -0,0 +1,335 @@ +{ + "output": { + "SCF": { + "nucl_rep_energy": 4475.262668100429, + "iter": { + "1": { + "energy": -1889.1269824644223, + "e_diff": -1889.1269824644223, + "rmsd": 0.010728802301404114, + "performance": { + "total_time": 3.177378299 + } + }, + "2": { + "energy": -1889.7673476552536, + "e_diff": -0.6403651908312895, + "rmsd": 0.005419096670553817, + "performance": { + "total_time": 4.276374071 + } + }, + "3": { + "energy": -1890.2150601313979, + "e_diff": -0.447712476144261, + "rmsd": 0.0009357654981807088, + "performance": { + "total_time": 3.17836137 + } + }, + "4": { + "energy": -1890.233016963255, + "e_diff": -0.01795683185719099, + "rmsd": 0.0003966128034843691, + "performance": { + "total_time": 3.178639027 + } + }, + "5": { + "energy": -1890.2358705578245, + "e_diff": -0.0028535945693874964, + "rmsd": 0.00021643765782968012, + "performance": { + "total_time": 3.183668211 + } + }, + "6": { + "energy": -1890.2363083568416, + "e_diff": -0.0004377990171633428, + "rmsd": 9.877430202775358e-05, + "performance": { + "total_time": 3.194116586 + } + }, + "7": { + "energy": -1890.236406541464, + "e_diff": -9.81846224021865e-05, + "rmsd": 4.3573252561953276e-05, + "performance": { + "total_time": 4.067460107 + } + }, + "8": { + "energy": -1890.2364234545967, + "e_diff": -1.6913132640183903e-05, + "rmsd": 1.1873340501180779e-05, + "performance": { + "total_time": 3.20327301 + } + }, + "9": { + "energy": -1890.236425345296, + "e_diff": -1.8906994228018448e-06, + "rmsd": 3.037964214545139e-06, + "performance": { + "total_time": 3.21347897 + } + }, + "10": { + "energy": -1890.2364255258472, + "e_diff": -1.8055106920655817e-07, + "rmsd": 1.052504721685541e-06, + "performance": { + "total_time": 3.219198324 + } + }, + "11": { + "energy": -1890.2364255489829, + "e_diff": -2.313572622369975e-08, + "rmsd": 3.8503890224021775e-07, + "performance": { + "total_time": 3.252110085 + } + }, + "12": { + "energy": -1890.2364255518487, + "e_diff": -2.865817805286497e-09, + "rmsd": 1.5783756224422576e-07, + "performance": { + "total_time": 3.254284735 + } + }, + "13": { + "energy": -1890.2364255522662, + "e_diff": -4.1745806811377406e-10, + "rmsd": 7.89610228331105e-08, + "performance": { + "total_time": 3.394743458 + } + } + }, + "final_energy": -1890.2364255522662, + "n_iterations": 13 + }, + "CD": { + "n_cholesky_vectors": 2333 + }, + "CCSD": { + "iter": { + "1": { + "residual": 1.5488198959225974, + "correlation": 0.0, + "performance": { + "total_time": 44.94849049 + } + }, + "2": { + "residual": 0.23427161971896038, + "correlation": -3.934636130333998, + "performance": { + "total_time": 44.982068407 + } + }, + "3": { + "residual": 0.09903709709060907, + "correlation": -3.9733160228360744, + "performance": { + "total_time": 44.67715573 + } + }, + "4": { + "residual": 0.03443674230573133, + "correlation": -4.1178619999382935, + "performance": { + "total_time": 44.679130305 + } + }, + "5": { + "residual": 0.019017381278698033, + "correlation": -4.130616794921958, + "performance": { + "total_time": 44.756815023 + } + }, + "6": { + "residual": 0.007587843458531064, + "correlation": -4.158098986994866, + "performance": { + "total_time": 44.90705685 + } + }, + "7": { + "residual": 0.0032780937380476024, + "correlation": -4.157488431243483, + "performance": { + "total_time": 45.131219903 + } + }, + "8": { + "residual": 0.0017613395238831315, + "correlation": -4.158510265724001, + "performance": { + "total_time": 45.142113822 + } + }, + "9": { + "residual": 0.0008887171522823565, + "correlation": -4.158660845561743, + "performance": { + "total_time": 44.984021118 + } + }, + "10": { + "residual": 0.0005630589397208675, + "correlation": -4.158901712203653, + "performance": { + "total_time": 45.03268134 + } + }, + "11": { + "residual": 0.00014142775499818093, + "correlation": -4.1591007485491245, + "performance": { + "total_time": 44.754019457 + } + }, + "12": { + "residual": 7.26220378666209e-05, + "correlation": -4.15922694691235, + "performance": { + "total_time": 44.994923674 + } + }, + "13": { + "residual": 4.2991464230325455e-05, + "correlation": -4.159232883609643, + "performance": { + "total_time": 44.896790956 + } + }, + "14": { + "residual": 2.977431902474512e-05, + "correlation": -4.159234655046466, + "performance": { + "total_time": 45.015552456 + } + }, + "15": { + "residual": 2.033836539555351e-05, + "correlation": -4.159235673418723, + "performance": { + "total_time": 44.95478251 + } + }, + "16": { + "residual": 6.688990054570346e-06, + "correlation": -4.159234301479996, + "performance": { + "total_time": 45.092610493 + } + }, + "17": { + "residual": 3.5876508094871872e-06, + "correlation": -4.159236669318126, + "performance": { + "total_time": 44.861878577 + } + }, + "18": { + "residual": 2.5370261559674257e-06, + "correlation": -4.159236540088763, + "performance": { + "total_time": 44.954944368 + } + }, + "19": { + "residual": 1.9674682877261343e-06, + "correlation": -4.159236706576976, + "performance": { + "total_time": 44.942382646 + } + }, + "20": { + "residual": 1.664791250643561e-06, + "correlation": -4.159236756119696, + "performance": { + "total_time": 44.765442837 + } + }, + "21": { + "residual": 4.857070286393798e-07, + "correlation": -4.159236702338797, + "performance": { + "total_time": 44.815795435 + } + } + }, + "n_iterations": 21, + "final_energy": { + "correlation": -4.159236702338797, + "total": -1894.395662254605 + } + }, + "CCSD(T)": { + "[T]Energies": { + "correction": -0.14237243849502376, + "correlation": -4.30160914083382, + "total": -1894.5380346931 + }, + "(T)Energies": { + "correction": -0.129101425642491, + "correlation": -4.288338127981288, + "total": -1894.5247636802476 + }, + "performance": { + "total_time": 864.447849803, + "gflops": 234747.85139673293, + "total_num_ops": 2.0292727538577997e+17, + "load_imbalance": 0.007057085792844808 + } + } + }, + "input": { + "molecule": { + "name": "ubiquitin_dgrtl", + "basisset": "6-31g", + "geometry_units": "angstrom" + }, + "SCF": { + "tol_int": 1e-22, + "tol_sch": 1e-16, + "tol_lindep": 1e-05, + "conve": 1e-08, + "convd": 1e-07, + "diis_hist": 12, + "AO_tilesize": 30, + "force_tilesize": "false", + "scf_type": "restricted", + "multiplicity": 1, + "lambdas": [], + "polvecs": [], + "omegas": [], + "volumes": [] + }, + "CD": { + "diagtol": 1e-05, + "itilesize": 1000, + "write_cv": true, + "write_vcount": 5000, + "max_cvecs_factor": 12 + }, + "CCSD": { + "threshold": 1e-06, + "tilesize": 60, + "ndiis": 5, + "readt": "false", + "writet": "true", + "ccsd_maxiter": 50, + "balance_tiles": "true" + }, + "CCSD_T": { + "skip_ccsd": false, + "ccsdt_tilesize": 40 + } + } +} diff --git a/ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.cc-pvdz.ccsd_t.json b/ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.cc-pvdz.ccsd_t.json new file mode 100644 index 0000000..0e2f040 --- /dev/null +++ b/ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.cc-pvdz.ccsd_t.json @@ -0,0 +1,327 @@ +{ + "output": { + "SCF": { + "nucl_rep_energy": 4475.262668100429, + "iter": { + "1": { + "energy": -1890.133526752751, + "e_diff": -1890.133526752751, + "rmsd": 0.003927746634133651, + "performance": { + "total_time": 2.872608249 + } + }, + "2": { + "energy": -1890.879387294969, + "e_diff": -0.7458605422179971, + "rmsd": 0.001854395524193849, + "performance": { + "total_time": 4.459093129 + } + }, + "3": { + "energy": -1891.238247976712, + "e_diff": -0.3588606817429536, + "rmsd": 0.0004859720126242096, + "performance": { + "total_time": 2.891845127 + } + }, + "4": { + "energy": -1891.2626045803954, + "e_diff": -0.02435660368337267, + "rmsd": 0.0001476286366882083, + "performance": { + "total_time": 2.887151817 + } + }, + "5": { + "energy": -1891.265245074569, + "e_diff": -0.002640494173647312, + "rmsd": 6.73240586972727e-05, + "performance": { + "total_time": 2.894285365 + } + }, + "6": { + "energy": -1891.2655619529469, + "e_diff": -0.0003168783778164652, + "rmsd": 3.558015850678364e-05, + "performance": { + "total_time": 2.904470198 + } + }, + "7": { + "energy": -1891.265648151444, + "e_diff": -8.619849722890649e-05, + "rmsd": 2.0341880203883064e-05, + "performance": { + "total_time": 3.494201523 + } + }, + "8": { + "energy": -1891.2656679625097, + "e_diff": -1.9811065612884704e-05, + "rmsd": 5.881550034303751e-06, + "performance": { + "total_time": 2.955933218 + } + }, + "9": { + "energy": -1891.2656701082396, + "e_diff": -2.14572992263129e-06, + "rmsd": 1.786400588009755e-06, + "performance": { + "total_time": 2.975357628 + } + }, + "10": { + "energy": -1891.2656703276061, + "e_diff": -2.193664840888232e-07, + "rmsd": 5.097813367373957e-07, + "performance": { + "total_time": 2.993834791 + } + }, + "11": { + "energy": -1891.2656703506555, + "e_diff": -2.304932422703132e-08, + "rmsd": 1.7931833269087904e-07, + "performance": { + "total_time": 3.033644468 + } + }, + "12": { + "energy": -1891.2656703537705, + "e_diff": -3.1150193535722792e-09, + "rmsd": 7.258200038407307e-08, + "performance": { + "total_time": 3.041630482 + } + } + }, + "final_energy": -1891.2656703537705, + "n_iterations": 12 + }, + "CD": { + "n_cholesky_vectors": 4680 + }, + "CCSD": { + "iter": { + "1": { + "residual": 2.0910266618184417, + "correlation": 0.0, + "performance": { + "total_time": 63.52386825 + } + }, + "2": { + "residual": 0.28242274727138184, + "correlation": -5.9477611752365895, + "performance": { + "total_time": 61.831035679 + } + }, + "3": { + "residual": 0.10543108268678836, + "correlation": -5.995972155423653, + "performance": { + "total_time": 61.942153552 + } + }, + "4": { + "residual": 0.031608788929061396, + "correlation": -6.16918228354707, + "performance": { + "total_time": 61.534701388 + } + }, + "5": { + "residual": 0.020147808361339253, + "correlation": -6.172350009009777, + "performance": { + "total_time": 61.648925318 + } + }, + "6": { + "residual": 0.008834211285473383, + "correlation": -6.196531643184589, + "performance": { + "total_time": 61.661445848 + } + }, + "7": { + "residual": 0.00403944981610847, + "correlation": -6.196136511460821, + "performance": { + "total_time": 61.588913306 + } + }, + "8": { + "residual": 0.002250969115668192, + "correlation": -6.197588262416118, + "performance": { + "total_time": 61.798171014 + } + }, + "9": { + "residual": 0.0011610842996448772, + "correlation": -6.1977623699158695, + "performance": { + "total_time": 61.737004882 + } + }, + "10": { + "residual": 0.0007840644341080898, + "correlation": -6.1981065654075955, + "performance": { + "total_time": 61.826872927 + } + }, + "11": { + "residual": 0.00018712195326376968, + "correlation": -6.198378857513728, + "performance": { + "total_time": 61.895499276 + } + }, + "12": { + "residual": 9.706906846199726e-05, + "correlation": -6.1985242780675875, + "performance": { + "total_time": 61.589857264 + } + }, + "13": { + "residual": 5.597477526973768e-05, + "correlation": -6.198511146829365, + "performance": { + "total_time": 61.904565654 + } + }, + "14": { + "residual": 3.724908240269413e-05, + "correlation": -6.198510174279947, + "performance": { + "total_time": 61.647209973 + } + }, + "15": { + "residual": 2.507383757349463e-05, + "correlation": -6.198507831936575, + "performance": { + "total_time": 62.167398702 + } + }, + "16": { + "residual": 8.943918667675381e-06, + "correlation": -6.198500390028496, + "performance": { + "total_time": 61.655400654 + } + }, + "17": { + "residual": 4.87173372135622e-06, + "correlation": -6.1985059166181555, + "performance": { + "total_time": 61.977968945 + } + }, + "18": { + "residual": 3.4134133967358956e-06, + "correlation": -6.1985066581172905, + "performance": { + "total_time": 61.648776652 + } + }, + "19": { + "residual": 2.5699709435494954e-06, + "correlation": -6.198507181994023, + "performance": { + "total_time": 61.391719638 + } + }, + "20": { + "residual": 2.164902706609658e-06, + "correlation": -6.198507386098635, + "performance": { + "total_time": 62.260367295 + } + }, + "21": { + "residual": 5.677698534817414e-07, + "correlation": -6.198507559551879, + "performance": { + "total_time": 61.474960151 + } + } + }, + "n_iterations": 21, + "final_energy": { + "correlation": -6.198507559551879, + "total": -1897.4641779133224 + } + }, + "CCSD(T)": { + "[T]Energies": { + "correction": -0.23012808673214305, + "correlation": -6.428635646284022, + "total": -1897.6943060000544 + }, + "(T)Energies": { + "correction": -0.21649152302506627, + "correlation": -6.414999082576945, + "total": -1897.6806694363474 + }, + "performance": { + "total_time": 825.457046853, + "gflops": 3741086.683164038, + "total_num_ops": 3.088106365505672e+18, + "load_imbalance": 0.01347777251503035 + } + } + }, + "input": { + "molecule": { + "name": "ubiquitin_dgrtl", + "basisset": "cc-pvdz", + "geometry_units": "angstrom" + }, + "SCF": { + "tol_int": 1e-22, + "tol_sch": 1e-16, + "tol_lindep": 1e-05, + "conve": 1e-08, + "convd": 1e-07, + "diis_hist": 12, + "AO_tilesize": 37, + "force_tilesize": "false", + "scf_type": "restricted", + "multiplicity": 1, + "lambdas": [], + "polvecs": [], + "omegas": [], + "volumes": [] + }, + "CD": { + "diagtol": 1e-05, + "itilesize": 1000, + "write_cv": true, + "write_vcount": 5000, + "max_cvecs_factor": 12 + }, + "CCSD": { + "threshold": 1e-06, + "tilesize": 60, + "ndiis": 5, + "readt": "false", + "writet": "true", + "ccsd_maxiter": 50, + "balance_tiles": "true" + }, + "CCSD_T": { + "skip_ccsd": false, + "ccsdt_tilesize": 40 + } + } +} diff --git a/ci/reference_output/ubiquitin_dgrtl.sto-3g.ccsd_t.json b/ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.sto-3g.ccsd_t.json similarity index 100% rename from ci/reference_output/ubiquitin_dgrtl.sto-3g.ccsd_t.json rename to ci/reference_output/ubiquitin_dgrtl/ubiquitin_dgrtl.sto-3g.ccsd_t.json diff --git a/ci/scripts/compare_results.py b/ci/scripts/compare_results.py index fa1cfe7..8f081a5 100644 --- a/ci/scripts/compare_results.py +++ b/ci/scripts/compare_results.py @@ -43,7 +43,7 @@ def isclose(a, b, rel_tol=1e-09, abs_tol=0): ref_files = os.listdir(ref_res_path) cur_files = os.listdir(cur_res_path) -ref_notreq = ["ubiquitin_dgrtl.sto-3g.ccsd.json","uracil.cc-pvdz.ccsd_t.json"] +ref_notreq = ["ubiquitin_dgrtl","uracil.cc-pvdz.ccsd_t.json"] for rf in ref_notreq: if rf not in cur_files: ref_files.remove(rf) @@ -246,4 +246,4 @@ def check_results(ref_energy,cur_energy,ccsd_threshold,en_str): missing_tests = [os.path.splitext(x)[0] for x in missing_tests] print(" ************ The following tests failed ************** ") print("\n".join(missing_tests)) - sys.exit(1) \ No newline at end of file + sys.exit(1) diff --git a/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp b/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp index cd85e3f..524d7bc 100644 --- a/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp +++ b/exachem/cc/ccsd/cd_ccsd_cs_ann.cpp @@ -390,10 +390,9 @@ void ccsd_t2_cs(Scheduler& sch, const TiledIndexSpace& MO, const TiledIndexSpace // A*B { +#if defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP) TensorElType2* abuf_dev{nullptr}; TensorElType3* bbuf_dev{nullptr}; - -#if defined(USE_CUDA) || defined(USE_HIP) || defined(USE_DPCPP) if(hw == ExecutionHW::GPU) { abuf_dev = static_cast(memDevicePool.allocate(asize * sizeof(TensorElType2))); diff --git a/exachem/cc/ccsd_t/ccsd_t.cpp b/exachem/cc/ccsd_t/ccsd_t.cpp index c76afa5..b9704e5 100644 --- a/exachem/cc/ccsd_t/ccsd_t.cpp +++ b/exachem/cc/ccsd_t/ccsd_t.cpp @@ -73,8 +73,6 @@ void ccsd_t_driver(std::string filename, OptionsMap options_map) { Scheduler sub_sch{*sub_ec}; - // force writet on - ccsd_options.writet = true; ccsd_options.computeTData = true; auto debug = ccsd_options.debug; @@ -455,9 +453,19 @@ void ccsd_t_driver(std::string filename, OptionsMap options_map) { tamm::reset_rmm_pool(); // tamm::reinitialize_rmm_pool(); + std::string dev_str = "[CPU]"; +#if defined(USE_CUDA) + dev_str = "[Nvidia GPU]"; +#elif defined(USE_HIP) + dev_str = "[AMD GPU]"; +#elif defined(USE_DPCPP) + dev_str = "[Intel GPU]"; +#endif + if(rank == 0) { - if(is_restricted) cout << endl << "Running Closed Shell CCSD(T) calculation" << endl; - else cout << endl << "Running Open Shell CCSD(T) calculation" << endl; + if(is_restricted) + cout << endl << dev_str << " Running Closed Shell CCSD(T) calculation" << endl; + else cout << endl << dev_str << " Running Open Shell CCSD(T) calculation" << endl; } bool seq_h3b = true; diff --git a/exachem/cc/scripts/ccsd_memory.py b/exachem/cc/scripts/ccsd_memory.py index c21eafa..2da1daf 100644 --- a/exachem/cc/scripts/ccsd_memory.py +++ b/exachem/cc/scripts/ccsd_memory.py @@ -57,6 +57,8 @@ def parseargs(argv=None): V = v_alpha + v_beta O = o_alpha + o_beta +nbf=o_alpha + v_alpha + ccsd_mem = 0 #setup tensors @@ -115,6 +117,9 @@ def parseargs(argv=None): #cv3d {MO,MO,CI} cv3d = CI*d_f1 +#(MOxMOxCI + MOxAOxCI) +chol_mem = CI*(d_f1+ 2*nbf*nbf) + #chol3d_oo, chol3d_ov, chol3d_vv chol3d_oo = CI*f1_oo #{O, O, CI}, {"aa", "bb"} chol3d_ov = CI*f1_ov #{O, V, CI}, {"aa", "bb"} @@ -172,6 +177,8 @@ def parseargs(argv=None): gib=1024*1024*1024.0 ccsd_mem = round(ccsd_mem*8/gib,2) #bytes +chol_mem = round(chol_mem*8/gib,2) #bytes +print("Total CPU memory required for Cholesky decomp of the 2e integrals: " + str(chol_mem) + " GiB") print("Total CPU memory required for CCSD calculation: " + str(ccsd_mem) + " GiB") v4_mem = v_alpha*v_alpha*v_beta*v_beta @@ -291,4 +298,4 @@ def ft_mem(i,j,k,l) : t_gpu_mem=9*(T*T + T*T*T*T + 2*2*nbf*T*T*T) * 8 t_gpu_mem=str(round(t_gpu_mem/gib,2)) -print("(T): memory required on a single gpu = " + t_gpu_mem + " GiB") \ No newline at end of file +print("(T): memory required on a single gpu = " + t_gpu_mem + " GiB") diff --git a/exachem/scf/scf_common.hpp b/exachem/scf/scf_common.hpp index 73adda1..1a359b8 100644 --- a/exachem/scf/scf_common.hpp +++ b/exachem/scf/scf_common.hpp @@ -38,12 +38,14 @@ using shellpair_data_t = struct SCFVars { // diis - int idiis = 0; - bool switch_diis = false; - double exc = 0.0; - double eqed = 0.0; - bool do_dens_fit = false; - bool do_load_bal = false; + int idiis = 0; + bool switch_diis = false; + double exc = 0.0; + double eqed = 0.0; + bool do_dens_fit = false; + bool do_load_bal = false; + bool lshift_reset = false; + bool lshift = 0; libecpint::ECPIntegrator ecp_factory; // AO spaces diff --git a/exachem/scf/scf_guess.cpp b/exachem/scf/scf_guess.cpp index f5a96d4..7bd61fd 100644 --- a/exachem/scf/scf_guess.cpp +++ b/exachem/scf/scf_guess.cpp @@ -581,8 +581,8 @@ void compute_pchg_ints(ExecutionContext& ec, const SCFVars& scf_vars, Tensor -void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& scalapack_info, - TAMMTensors& ttensors, EigenTensors& etensors) { +void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, SCFVars& scf_vars, + ScalapackInfo& scalapack_info, TAMMTensors& ttensors, EigenTensors& etensors) { auto rank = sch.ec().pg().rank(); // const bool debug = sys_data.options_map.scf_options.debug && rank==0; @@ -598,6 +598,9 @@ void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& const int64_t N = sys_data.nbf_orig; const bool is_uhf = sys_data.is_unrestricted; // const bool is_rhf = sys_data.is_restricted; + const int nelectrons_alpha = sys_data.nelectrons_alpha; + const int nelectrons_beta = sys_data.nelectrons_beta; + double hl_gap = 0; #if defined(USE_SCALAPACK) @@ -686,11 +689,11 @@ void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& 1, 1, TMP1_sca.desc(), 0., Fp_sca.data(), 1, 1, Fp_sca.desc()); // Solve EVP - std::vector eps_a(Northo); + std::vector eps_b(Northo); // scalapackpp::hereigd( scalapackpp::Job::Vec, scalapackpp::Uplo::Lower, - // Fp_sca, eps_a.data(), Ca_sca ); + // Fp_sca, eps_b.data(), Ca_sca ); /*info=*/scalapackpp::hereig(scalapackpp::Job::Vec, scalapackpp::Uplo::Lower, Fp_sca.m(), - Fp_sca.data(), 1, 1, Fp_sca.desc(), eps_a.data(), + Fp_sca.data(), 1, 1, Fp_sca.desc(), eps_b.data(), Ca_sca.data(), 1, 1, Ca_sca.desc()); // Backtransform TMP = X * Cb -> TMP**T = Cb**T * X @@ -703,6 +706,10 @@ void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& // Gather results // if(scalapack_info.pg.rank() == 0) C_beta.resize(N, Northo); // TMP2_sca.gather_from(Northo, N, C_beta.data(), Northo, 0, 0); + + if(!scf_vars.lshift_reset) + hl_gap = std::min(eps_a[nelectrons_alpha], eps_b[nelectrons_beta]) - + std::max(eps_a[nelectrons_alpha - 1], eps_b[nelectrons_beta - 1]); } } // rank participates in ScaLAPACK call @@ -715,7 +722,8 @@ void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& const int64_t Northo_a = sys_data.nbf; // X_a.cols(); // TODO: avoid eigen Fp - Matrix X_a; + Matrix X_a; + std::vector eps_a; if(rank == 0) { // alpha Matrix Fp = tamm_to_eigen_matrix(ttensors.F_alpha); @@ -725,7 +733,7 @@ void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& Fp.data(), N, X_a.data(), Northo_a, 0., C_alpha.data(), N); blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, Northo_a, Northo_a, N, 1., X_a.data(), Northo_a, C_alpha.data(), N, 0., Fp.data(), Northo_a); - std::vector eps_a(Northo_a); + eps_a.resize(Northo_a); lapack::syevd(lapack::Job::Vec, lapack::Uplo::Lower, Northo_a, Fp.data(), Northo_a, eps_a.data()); blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans, Northo_a, N, Northo_a, @@ -748,9 +756,23 @@ void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& eps_b.data()); blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans, Northo_b, N, Northo_b, 1., Fp.data(), Northo_b, X_b.data(), Northo_b, 0., C_beta.data(), Northo_b); + + if(!scf_vars.lshift_reset) { + hl_gap = std::min(eps_a[nelectrons_alpha], eps_b[nelectrons_beta]) - + std::max(eps_a[nelectrons_alpha - 1], eps_b[nelectrons_beta - 1]); + } } + if(!scf_vars.lshift_reset) sch.ec().pg().broadcast(&hl_gap, 1, 0); } #endif + + if(!scf_vars.lshift_reset && is_uhf) { + if(hl_gap < 1e-2) { + scf_vars.lshift_reset = true; + scf_vars.lshift = 0.5; + if(rank == 0) cout << "Resetting lshift to 0.5" << endl; + } + } } template void scf_guess::subshell_occvec(double& occvec, size_t size, size_t& ne); @@ -778,6 +800,6 @@ template void compute_ecp_ints(ExecutionContext& ec, const SCFVars& scf_vars, std::vector& shells, std::vector& ecps); -template void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, +template void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, SCFVars& scf_vars, ScalapackInfo& scalapack_info, TAMMTensors& ttensors, EigenTensors& etensors); diff --git a/exachem/scf/scf_guess.hpp b/exachem/scf/scf_guess.hpp index c43327c..931fb69 100644 --- a/exachem/scf/scf_guess.hpp +++ b/exachem/scf/scf_guess.hpp @@ -59,8 +59,8 @@ void compute_ecp_ints(ExecutionContext& ec, const SCFVars& scf_vars, Tensor& ecps); template -void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, ScalapackInfo& scalapack_info, - TAMMTensors& ttensors, EigenTensors& etensors); +void scf_diagonalize(Scheduler& sch, const SystemData& sys_data, SCFVars& scf_vars, + ScalapackInfo& scalapack_info, TAMMTensors& ttensors, EigenTensors& etensors); template void compute_sad_guess(ExecutionContext& ec, ScalapackInfo& scalapack_info, SystemData& sys_data, diff --git a/exachem/scf/scf_iter.cpp b/exachem/scf/scf_iter.cpp index 3f49ee2..c56f8f7 100644 --- a/exachem/scf/scf_iter.cpp +++ b/exachem/scf/scf_iter.cpp @@ -20,7 +20,7 @@ std::tuple scf_iter_body(ExecutionContext& ec, const bool is_uhf = sys_data.is_unrestricted; const bool is_rhf = sys_data.is_restricted; - const double lshift = sys_data.options_map.scf_options.lshift; + const double lshift = scf_vars.lshift; Tensor& H1 = ttensors.H1; Tensor& S1 = ttensors.S1; @@ -181,7 +181,7 @@ std::tuple scf_iter_body(ExecutionContext& ec, auto do_t1 = std::chrono::high_resolution_clock::now(); - scf_diagonalize(sch, sys_data, scalapack_info, ttensors, etensors); + scf_diagonalize(sch, sys_data, scf_vars, scalapack_info, ttensors, etensors); auto do_t2 = std::chrono::high_resolution_clock::now(); auto do_time = diff --git a/exachem/scf/scf_main.cpp b/exachem/scf/scf_main.cpp index d71d8d2..149316d 100644 --- a/exachem/scf/scf_main.cpp +++ b/exachem/scf/scf_main.cpp @@ -247,6 +247,7 @@ hartree_fock(ExecutionContext& exc, const string filename, OptionsMap options_ma } SCFVars scf_vars; // init vars + scf_vars.lshift = sys_data.options_map.scf_options.lshift; if(rank == 0) { const double fock_precision = std::min(scf_options.tol_sch, 1e-2 * scf_options.conve); @@ -755,7 +756,7 @@ hartree_fock(ExecutionContext& exc, const string filename, OptionsMap options_ma shell2bf, SchwarzK, max_nprim4, ttensors, etensors, is_3c_init, do_density_fitting, 1.0); - scf_diagonalize(sch, sys_data, scalapack_info, ttensors, etensors); + scf_diagonalize(sch, sys_data, scf_vars, scalapack_info, ttensors, etensors); compute_density(ec, sys_data, scf_vars, scalapack_info, ttensors, etensors); @@ -953,6 +954,9 @@ hartree_fock(ExecutionContext& exc, const string filename, OptionsMap options_ma if(debug) print_energies(ec, ttensors, etensors, sys_data, scf_vars, scalapack_info, debug); + // Reset lshift to input option. + if(fabs(ediff) > 1e-2) scf_vars.lshift = sys_data.options_map.scf_options.lshift; + } while((fabs(ediff) > conve) || (fabs(rmsd) > convd)); // SCF main loop if(rank == 0) {