Fix run caly model devi (#197)

wangzyphysics · web-flow · commit b81a1956f6fc · 2024-03-12T17:19:51.000+08:00
CALYPSO will propose structures with different chemical formula when
using `VSC` mode. However, dpdata cannot parse a dump file containing
frames with different chemical formula. To resolve this issue, frames
are separated into different dump files based on the number of atoms in
each frame.
diff --git a/dpgen2/op/collect_run_caly.py b/dpgen2/op/collect_run_caly.py
@@ -69,6 +69,7 @@ def get_input_sign(cls):
                 "opt_results_dir": Artifact(
                     type=Path, optional=True
                 ),  # dir contains POSCAR* CONTCAR* OUTCAR*
+                "qhull_input": Artifact(type=Path, optional=True),  # for vsc
             }
         )
 
@@ -82,6 +83,7 @@ def get_output_sign(cls):
                 "input_file": Artifact(Path),  # input.dat
                 "results": Artifact(Path),  # calypso generated results
                 "step": Artifact(Path),  # step
+                "qhull_input": Artifact(Path),
             }
         )
 
@@ -104,6 +106,7 @@ def execute(
             - `step`: (`Path`) The step file from last calypso run
             - `results`: (`Path`) The results dir from last calypso run
             - `opt_results_dir`: (`Path`) The results dir contains POSCAR* CONTCAR* OUTCAR* from last calypso run
+            - `qhull_input`: (`Path`) qhull input file `test_qconvex.in`
 
         Returns
         -------
@@ -115,6 +118,7 @@ def execute(
             - `input_file`: (`Path`) The input file of the task (input.dat).
             - `step`: (`Path`) The step file.
             - `results`: (`Path`) The results dir.
+            - `qhull_input`: (`Path`) qhull input file.
 
         Raises
         ------
@@ -129,7 +133,7 @@ def execute(
         # input.dat
         _input_file = ip["input_file"]
         input_file = _input_file.resolve()
-        max_step = get_max_step(input_file)
+        max_step, vsc = get_value_from_inputdat(input_file)
         # work_dir name: calypso_task.idx
         work_dir = Path(ip["task_name"])
 
@@ -142,10 +146,15 @@ def execute(
             if ip["opt_results_dir"] is not None
             else ip["opt_results_dir"]
         )
+        qhull_input = (
+            ip["qhull_input"].resolve()
+            if ip["qhull_input"] is not None
+            else ip["qhull_input"]
+        )
 
         with set_directory(work_dir):
             # prep files/dirs from last calypso run
-            prep_last_calypso_file(step, results, opt_results_dir)
+            prep_last_calypso_file(step, results, opt_results_dir, qhull_input, vsc)
             # copy input.dat
             Path(input_file.name).symlink_to(input_file)
             # run calypso
@@ -177,21 +186,19 @@ def execute(
 
             step = Path("step").read_text().strip()
             finished = "true" if int(cnt_num) == int(max_step) else "false"
-            # poscar_dir = "poscar_dir_none" if not finished else poscar_dir
-            # fake_traj = Path("traj_results_dir")
-            # fake_traj.mkdir(parents=True, exist_ok=True)
+
+            if not Path("test_qconvex.in").exists():
+                Path("test_qconvex.in").write_text("")
 
         ret_dict = {
             "task_name": str(work_dir),
             "finished": finished,
             "poscar_dir": work_dir.joinpath(poscar_dir),
-            # "input_file": ip["input_file"],
             "input_file": _input_file,
             "step": work_dir.joinpath("step"),
             "results": work_dir.joinpath("results"),
-            # "fake_traj_results_dir": work_dir.joinpath(fake_traj),
+            "qhull_input": work_dir.joinpath("test_qconvex.in"),
         }
-
         return OPIO(ret_dict)
 
     @staticmethod
@@ -219,19 +226,28 @@ def normalize_config(data={}):
 config_args = CollRunCaly.calypso_args
 
 
-def prep_last_calypso_file(step, results, opt_results_dir):
+def prep_last_calypso_file(step, results, opt_results_dir, qhull_input, vsc):
     if step is not None and results is not None or opt_results_dir is not None:
         Path(step.name).symlink_to(step)
         Path(results.name).symlink_to(results)
         for file_name in opt_results_dir.iterdir():
             Path(file_name.name).symlink_to(file_name)
 
+    if vsc and qhull_input is not None:
+        Path(qhull_input.name).symlink_to(qhull_input)
+
 
-def get_max_step(filename):
+def get_value_from_inputdat(filename):
+    max_step = 0
+    vsc = False
     with open(filename, "r") as f:
         lines = f.readlines()
         for line in lines:
             if "MaxStep" in line:
                 max_step = int(line.strip().split("#")[0].split("=")[1])
-                return max_step
-        raise ValueError(f"Key 'MaxStep' missed in {str(filename)}")
+                continue
+            if "VSC" in line:
+                vsc_str = line.strip().split("#")[0].split("=")[1].lower().strip()
+                if vsc_str.startswith("t"):
+                    vsc = True
+        return max_step, vsc
diff --git a/dpgen2/op/run_caly_model_devi.py b/dpgen2/op/run_caly_model_devi.py
@@ -1,3 +1,6 @@
+from collections import (
+    defaultdict,
+)
 from pathlib import (
     Path,
 )
@@ -45,8 +48,8 @@ def get_output_sign(cls):
         return OPIOSign(
             {
                 "task_name": Parameter(str),
-                "traj": Artifact(Path),
-                "model_devi": Artifact(Path),
+                "traj": Artifact(List[Path]),
+                "model_devi": Artifact(List[Path]),
             }
         )
 
@@ -71,8 +74,8 @@ def execute(
         Any
             Output dict with components:
             - `task_name`: (`str`) The name of task.
-            - `traj`: (`Artifact(Path)`) The output trajectory.
-            - `model_devi`: (`Artifact(Path)`) The model deviation. The order of recorded model deviations should be consistent with the order of frames in `traj`.
+            - `traj`: (`Artifact(List[Path])`) The output trajectory.
+            - `model_devi`: (`Artifact(List[Path])`) The model deviation. The order of recorded model deviations should be consistent with the order of frames in `traj`.
 
         """
 
@@ -92,45 +95,70 @@ def execute(
         traj_dirs = ip["traj_dirs"]
         traj_dirs = [traj_dir.resolve() for traj_dir in traj_dirs]
 
-        dump_file_name = "traj.dump"
-        model_devi_file_name = "model_devi.out"
+        dump_file_name = "traj.%d.dump"
+        model_devi_file_name = "model_devi.%d.out"
 
-        Devis = []
         tcount = 0
         with set_directory(work_dir):
-            dump_file = Path().joinpath(dump_file_name)
-            model_devi_file = Path().joinpath(model_devi_file_name)
-            f = open(dump_file, "a")
+            dump_str_dict = defaultdict(list)  # key: natoms, value: dump_strs
+            devis_dict = defaultdict(list)  # key: natoms, value: Devis-s
             for traj_dir in traj_dirs:
                 for traj_name in traj_dir.rglob("*.traj"):
                     atoms_list = parse_traj(traj_name)
                     if atoms_list is None:
                         continue
                     for atoms in atoms_list:
-                        dump_str = atoms2lmpdump(atoms, tcount, type_map)
-                        f.write(dump_str)
+                        natoms = len(atoms)
+                        dump_str = atoms2lmpdump(atoms, tcount, type_map, ignore=True)
+                        dump_str_dict[natoms].append(dump_str)
+
                         pbc = np.all(atoms.get_pbc())
                         coord = atoms.get_positions().reshape(1, -1)
                         cell = atoms.get_cell().array.reshape(1, -1) if pbc else None
                         atype = [type_map.index(atom.symbol) for atom in atoms]  # type: ignore
                         devi = calc_model_devi(coord, cell, atype, graphs)[0]
-                        devi[0] = tcount
-                        Devis.append(devi)
+                        devis_dict[natoms].append(devi)
                         tcount += 1
-            f.close()
-            Devis = np.vstack(Devis)
-            write_model_devi_out(Devis, model_devi_file)
+
+            traj_file_list = []
+            model_devi_file_list = []
+            keys = dump_str_dict.keys()
+            for key in keys:
+                dump_file = Path().joinpath(dump_file_name % key)
+                model_devi_file = Path().joinpath(model_devi_file_name % key)
+
+                traj_str = dump_str_dict[key]
+                model_devis = devis_dict[key]
+                assert len(traj_str) == len(
+                    model_devis
+                ), "The length of traj_str and model_devis should be same."
+                for idx in range(len(model_devis)):
+                    traj_str[idx] = traj_str[idx] % idx
+                    model_devis[idx][0] = idx
+
+                traj_str = "".join(traj_str)
+                dump_file.write_text(traj_str)
+
+                model_devis = np.vstack(model_devis)
+                write_model_devi_out(model_devis, model_devi_file)
+
+                traj_file_list.append(dump_file)
+                model_devi_file_list.append(model_devi_file)
+
+        for idx in range(len(traj_file_list)):
+            traj_file_list[idx] = work_dir / traj_file_list[idx]
+            model_devi_file_list[idx] = work_dir / model_devi_file_list[idx]
 
         ret_dict = {
             "task_name": str(work_dir),
-            "traj": work_dir / dump_file,
-            "model_devi": work_dir / model_devi_file,
+            "traj": traj_file_list,
+            "model_devi": model_devi_file_list,
         }
 
         return OPIO(ret_dict)
 
 
-def atoms2lmpdump(atoms, struc_idx, type_map):
+def atoms2lmpdump(atoms, struc_idx, type_map, ignore=False):
     """down triangle cell can be obtained from
     cell params: a, b, c, alpha, beta, gamma.
     cell = cellpar_to_cell([a, b, c, alpha, beta, gamma])
@@ -154,7 +182,10 @@ def atoms2lmpdump(atoms, struc_idx, type_map):
     )
 
     dump_str = "ITEM: TIMESTEP\n"
-    dump_str += f"{struc_idx}\n"
+    if not ignore:
+        dump_str += f"{struc_idx}\n"
+    else:
+        dump_str += "%d\n"
     dump_str += "ITEM: NUMBER OF ATOMS\n"
     dump_str += f"{atoms.get_global_number_of_atoms()}\n"
 
diff --git a/dpgen2/superop/caly_evo_step.py b/dpgen2/superop/caly_evo_step.py
@@ -73,6 +73,7 @@ def __init__(
             "results": InputArtifact(optional=True),
             "step": InputArtifact(optional=True),
             "opt_results_dir": InputArtifact(optional=True),
+            "qhull_input": InputArtifact(optional=True),
         }
         self._output_parameters = {
             # "task_name": OutputParameter(),
@@ -177,6 +178,7 @@ def _caly_evo_step(
             "step": caly_evo_step_steps.inputs.artifacts["step"],
             "results": caly_evo_step_steps.inputs.artifacts["results"],
             "opt_results_dir": caly_evo_step_steps.inputs.artifacts["opt_results_dir"],
+            "qhull_input": caly_evo_step_steps.inputs.artifacts["qhull_input"],
         },
         key="%s--collect-run-calypso-%s-%s"
         % (
@@ -245,13 +247,14 @@ def _caly_evo_step(
             ],  # input.dat
             "results": collect_run_calypso.outputs.artifacts["results"],
             "step": collect_run_calypso.outputs.artifacts["step"],
+            "qhull_input": collect_run_calypso.outputs.artifacts["qhull_input"],
             "opt_results_dir": prep_run_dp_optim.outputs.artifacts["optim_results_dir"],
             "caly_run_opt_file": prep_run_dp_optim.outputs.artifacts[
                 "caly_run_opt_file"
-            ],  # input.dat
+            ],
             "caly_check_opt_file": prep_run_dp_optim.outputs.artifacts[
                 "caly_check_opt_file"
-            ],  # input.dat
+            ],
         },
         when="%s == false" % (collect_run_calypso.outputs.parameters["finished"]),
     )
diff --git a/dpgen2/superop/prep_run_calypso.py b/dpgen2/superop/prep_run_calypso.py
@@ -213,6 +213,7 @@ def _prep_run_caly(
             "results": temp_value,
             "step": temp_value,
             "opt_results_dir": temp_value,
+            "qhull_input": temp_value,
         },
         key=step_keys["caly-evo-step-{{item}}"],
         with_sequence=argo_sequence(
diff --git a/tests/mocked_ops.py b/tests/mocked_ops.py
@@ -969,6 +969,11 @@ def execute(
         work_dir = Path(ip["task_name"])
         work_dir.mkdir(exist_ok=True, parents=True)
 
+        qhull_input = (
+            ip["qhull_input"].resolve()
+            if ip["qhull_input"] is not None
+            else ip["qhull_input"]
+        )
         step = ip["step"].resolve() if ip["step"] is not None else ip["step"]
         results = (
             ip["results"].resolve() if ip["results"] is not None else ip["results"]
@@ -999,6 +1004,9 @@ def execute(
             step_num = Path("step").read_text().strip()
             Path("step").write_text(f"{int(step_num)+1}")
 
+        if qhull_input is None:
+            Path("test_qconvex.in").write_text("")
+
         step_num = int(Path("step").read_text().strip())
 
         if results is None:
@@ -1031,6 +1039,7 @@ def execute(
             "input_file": work_dir.joinpath(input_file.name),
             "results": work_dir.joinpath("results"),
             "step": work_dir.joinpath("step"),
+            "qhull_input": work_dir.joinpath("test_qconvex.in"),
         }
         return OPIO(ret_dict)
 
@@ -1157,7 +1166,7 @@ def execute(
         return OPIO(
             {
                 "task_name": str(work_dir),
-                "traj": work_dir / dump_file_name,
-                "model_devi": work_dir / model_devi_file_name,
+                "traj": [work_dir / dump_file_name],
+                "model_devi": [work_dir / model_devi_file_name],
             }
         )
diff --git a/tests/op/test_collect_run_caly.py b/tests/op/test_collect_run_caly.py
@@ -28,7 +28,7 @@
     calypso_input_file,
     calypso_log_name,
 )
-from dpgen2.op.collect_run_caly import CollRunCaly, get_max_step
+from dpgen2.op.collect_run_caly import CollRunCaly, get_value_from_inputdat
 from dpgen2.utils import (
     BinaryFileInput,
 )
@@ -44,7 +44,7 @@ def setUp(self):
         self.input_file_path = Path("input_file")
         self.input_file_path.mkdir(parents=True, exist_ok=True)
         self.input_file = self.input_file_path.joinpath(calypso_input_file)
-        self.input_file.write_text("input.dat\nMaxStep=3\n")
+        self.input_file.write_text("input.dat\nMaxStep=3\nVSC= T\n")
 
         self.step_file = self.input_file_path.joinpath("step")
         self.step_file.write_text("3")
@@ -69,12 +69,15 @@ def tearDown(self):
         shutil.rmtree(Path(self.task_name), ignore_errors=True)
 
     def test_get_max_step(self):
-        max_step = get_max_step(self.input_file)
+        max_step, vsc = get_value_from_inputdat(self.input_file)
         self.assertTrue(max_step == 3)
+        self.assertTrue(vsc == True)
 
         temp_input_file = self.input_file_path.joinpath("temp_input_dat")
         temp_input_file.write_text("input.dat\n")
-        self.assertRaises(ValueError, get_max_step, temp_input_file)
+        max_step, vsc = get_value_from_inputdat(temp_input_file)
+        self.assertTrue(max_step == 0)
+        self.assertTrue(vsc == False)
 
     @patch("dpgen2.op.collect_run_caly.run_command")
     def test_step_st_maxstep_01(self, mocked_run):
@@ -109,6 +112,7 @@ def side_effect(*args, **kwargs):
         self.assertEqual(out["input_file"], self.input_file)
         self.assertEqual(out["step"], Path(self.task_name) / "step")
         self.assertEqual(out["results"], Path(self.task_name) / "results")
+        self.assertEqual(out["qhull_input"], Path(self.task_name) / "test_qconvex.in")
         self.assertEqual(out["finished"], "false")
 
     @patch("dpgen2.op.collect_run_caly.run_command")
diff --git a/tests/op/test_run_caly_model_devi.py b/tests/op/test_run_caly_model_devi.py
diff --git a/tests/test_caly_evo_step.py b/tests/test_caly_evo_step.py