Skip to content

Commit b81a195

Browse files
Fix run caly model devi (#197)
CALYPSO will propose structures with different chemical formula when using `VSC` mode. However, dpdata cannot parse a dump file containing frames with different chemical formula. To resolve this issue, frames are separated into different dump files based on the number of atoms in each frame.
1 parent 00d3f5e commit b81a195

File tree

8 files changed

+156
-50
lines changed

8 files changed

+156
-50
lines changed

dpgen2/op/collect_run_caly.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def get_input_sign(cls):
6969
"opt_results_dir": Artifact(
7070
type=Path, optional=True
7171
), # dir contains POSCAR* CONTCAR* OUTCAR*
72+
"qhull_input": Artifact(type=Path, optional=True), # for vsc
7273
}
7374
)
7475

@@ -82,6 +83,7 @@ def get_output_sign(cls):
8283
"input_file": Artifact(Path), # input.dat
8384
"results": Artifact(Path), # calypso generated results
8485
"step": Artifact(Path), # step
86+
"qhull_input": Artifact(Path),
8587
}
8688
)
8789

@@ -104,6 +106,7 @@ def execute(
104106
- `step`: (`Path`) The step file from last calypso run
105107
- `results`: (`Path`) The results dir from last calypso run
106108
- `opt_results_dir`: (`Path`) The results dir contains POSCAR* CONTCAR* OUTCAR* from last calypso run
109+
- `qhull_input`: (`Path`) qhull input file `test_qconvex.in`
107110
108111
Returns
109112
-------
@@ -115,6 +118,7 @@ def execute(
115118
- `input_file`: (`Path`) The input file of the task (input.dat).
116119
- `step`: (`Path`) The step file.
117120
- `results`: (`Path`) The results dir.
121+
- `qhull_input`: (`Path`) qhull input file.
118122
119123
Raises
120124
------
@@ -129,7 +133,7 @@ def execute(
129133
# input.dat
130134
_input_file = ip["input_file"]
131135
input_file = _input_file.resolve()
132-
max_step = get_max_step(input_file)
136+
max_step, vsc = get_value_from_inputdat(input_file)
133137
# work_dir name: calypso_task.idx
134138
work_dir = Path(ip["task_name"])
135139

@@ -142,10 +146,15 @@ def execute(
142146
if ip["opt_results_dir"] is not None
143147
else ip["opt_results_dir"]
144148
)
149+
qhull_input = (
150+
ip["qhull_input"].resolve()
151+
if ip["qhull_input"] is not None
152+
else ip["qhull_input"]
153+
)
145154

146155
with set_directory(work_dir):
147156
# prep files/dirs from last calypso run
148-
prep_last_calypso_file(step, results, opt_results_dir)
157+
prep_last_calypso_file(step, results, opt_results_dir, qhull_input, vsc)
149158
# copy input.dat
150159
Path(input_file.name).symlink_to(input_file)
151160
# run calypso
@@ -177,21 +186,19 @@ def execute(
177186

178187
step = Path("step").read_text().strip()
179188
finished = "true" if int(cnt_num) == int(max_step) else "false"
180-
# poscar_dir = "poscar_dir_none" if not finished else poscar_dir
181-
# fake_traj = Path("traj_results_dir")
182-
# fake_traj.mkdir(parents=True, exist_ok=True)
189+
190+
if not Path("test_qconvex.in").exists():
191+
Path("test_qconvex.in").write_text("")
183192

184193
ret_dict = {
185194
"task_name": str(work_dir),
186195
"finished": finished,
187196
"poscar_dir": work_dir.joinpath(poscar_dir),
188-
# "input_file": ip["input_file"],
189197
"input_file": _input_file,
190198
"step": work_dir.joinpath("step"),
191199
"results": work_dir.joinpath("results"),
192-
# "fake_traj_results_dir": work_dir.joinpath(fake_traj),
200+
"qhull_input": work_dir.joinpath("test_qconvex.in"),
193201
}
194-
195202
return OPIO(ret_dict)
196203

197204
@staticmethod
@@ -219,19 +226,28 @@ def normalize_config(data={}):
219226
config_args = CollRunCaly.calypso_args
220227

221228

222-
def prep_last_calypso_file(step, results, opt_results_dir):
229+
def prep_last_calypso_file(step, results, opt_results_dir, qhull_input, vsc):
223230
if step is not None and results is not None or opt_results_dir is not None:
224231
Path(step.name).symlink_to(step)
225232
Path(results.name).symlink_to(results)
226233
for file_name in opt_results_dir.iterdir():
227234
Path(file_name.name).symlink_to(file_name)
228235

236+
if vsc and qhull_input is not None:
237+
Path(qhull_input.name).symlink_to(qhull_input)
238+
229239

230-
def get_max_step(filename):
240+
def get_value_from_inputdat(filename):
241+
max_step = 0
242+
vsc = False
231243
with open(filename, "r") as f:
232244
lines = f.readlines()
233245
for line in lines:
234246
if "MaxStep" in line:
235247
max_step = int(line.strip().split("#")[0].split("=")[1])
236-
return max_step
237-
raise ValueError(f"Key 'MaxStep' missed in {str(filename)}")
248+
continue
249+
if "VSC" in line:
250+
vsc_str = line.strip().split("#")[0].split("=")[1].lower().strip()
251+
if vsc_str.startswith("t"):
252+
vsc = True
253+
return max_step, vsc

dpgen2/op/run_caly_model_devi.py

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from collections import (
2+
defaultdict,
3+
)
14
from pathlib import (
25
Path,
36
)
@@ -45,8 +48,8 @@ def get_output_sign(cls):
4548
return OPIOSign(
4649
{
4750
"task_name": Parameter(str),
48-
"traj": Artifact(Path),
49-
"model_devi": Artifact(Path),
51+
"traj": Artifact(List[Path]),
52+
"model_devi": Artifact(List[Path]),
5053
}
5154
)
5255

@@ -71,8 +74,8 @@ def execute(
7174
Any
7275
Output dict with components:
7376
- `task_name`: (`str`) The name of task.
74-
- `traj`: (`Artifact(Path)`) The output trajectory.
75-
- `model_devi`: (`Artifact(Path)`) The model deviation. The order of recorded model deviations should be consistent with the order of frames in `traj`.
77+
- `traj`: (`Artifact(List[Path])`) The output trajectory.
78+
- `model_devi`: (`Artifact(List[Path])`) The model deviation. The order of recorded model deviations should be consistent with the order of frames in `traj`.
7679
7780
"""
7881

@@ -92,45 +95,70 @@ def execute(
9295
traj_dirs = ip["traj_dirs"]
9396
traj_dirs = [traj_dir.resolve() for traj_dir in traj_dirs]
9497

95-
dump_file_name = "traj.dump"
96-
model_devi_file_name = "model_devi.out"
98+
dump_file_name = "traj.%d.dump"
99+
model_devi_file_name = "model_devi.%d.out"
97100

98-
Devis = []
99101
tcount = 0
100102
with set_directory(work_dir):
101-
dump_file = Path().joinpath(dump_file_name)
102-
model_devi_file = Path().joinpath(model_devi_file_name)
103-
f = open(dump_file, "a")
103+
dump_str_dict = defaultdict(list) # key: natoms, value: dump_strs
104+
devis_dict = defaultdict(list) # key: natoms, value: Devis-s
104105
for traj_dir in traj_dirs:
105106
for traj_name in traj_dir.rglob("*.traj"):
106107
atoms_list = parse_traj(traj_name)
107108
if atoms_list is None:
108109
continue
109110
for atoms in atoms_list:
110-
dump_str = atoms2lmpdump(atoms, tcount, type_map)
111-
f.write(dump_str)
111+
natoms = len(atoms)
112+
dump_str = atoms2lmpdump(atoms, tcount, type_map, ignore=True)
113+
dump_str_dict[natoms].append(dump_str)
114+
112115
pbc = np.all(atoms.get_pbc())
113116
coord = atoms.get_positions().reshape(1, -1)
114117
cell = atoms.get_cell().array.reshape(1, -1) if pbc else None
115118
atype = [type_map.index(atom.symbol) for atom in atoms] # type: ignore
116119
devi = calc_model_devi(coord, cell, atype, graphs)[0]
117-
devi[0] = tcount
118-
Devis.append(devi)
120+
devis_dict[natoms].append(devi)
119121
tcount += 1
120-
f.close()
121-
Devis = np.vstack(Devis)
122-
write_model_devi_out(Devis, model_devi_file)
122+
123+
traj_file_list = []
124+
model_devi_file_list = []
125+
keys = dump_str_dict.keys()
126+
for key in keys:
127+
dump_file = Path().joinpath(dump_file_name % key)
128+
model_devi_file = Path().joinpath(model_devi_file_name % key)
129+
130+
traj_str = dump_str_dict[key]
131+
model_devis = devis_dict[key]
132+
assert len(traj_str) == len(
133+
model_devis
134+
), "The length of traj_str and model_devis should be same."
135+
for idx in range(len(model_devis)):
136+
traj_str[idx] = traj_str[idx] % idx
137+
model_devis[idx][0] = idx
138+
139+
traj_str = "".join(traj_str)
140+
dump_file.write_text(traj_str)
141+
142+
model_devis = np.vstack(model_devis)
143+
write_model_devi_out(model_devis, model_devi_file)
144+
145+
traj_file_list.append(dump_file)
146+
model_devi_file_list.append(model_devi_file)
147+
148+
for idx in range(len(traj_file_list)):
149+
traj_file_list[idx] = work_dir / traj_file_list[idx]
150+
model_devi_file_list[idx] = work_dir / model_devi_file_list[idx]
123151

124152
ret_dict = {
125153
"task_name": str(work_dir),
126-
"traj": work_dir / dump_file,
127-
"model_devi": work_dir / model_devi_file,
154+
"traj": traj_file_list,
155+
"model_devi": model_devi_file_list,
128156
}
129157

130158
return OPIO(ret_dict)
131159

132160

133-
def atoms2lmpdump(atoms, struc_idx, type_map):
161+
def atoms2lmpdump(atoms, struc_idx, type_map, ignore=False):
134162
"""down triangle cell can be obtained from
135163
cell params: a, b, c, alpha, beta, gamma.
136164
cell = cellpar_to_cell([a, b, c, alpha, beta, gamma])
@@ -154,7 +182,10 @@ def atoms2lmpdump(atoms, struc_idx, type_map):
154182
)
155183

156184
dump_str = "ITEM: TIMESTEP\n"
157-
dump_str += f"{struc_idx}\n"
185+
if not ignore:
186+
dump_str += f"{struc_idx}\n"
187+
else:
188+
dump_str += "%d\n"
158189
dump_str += "ITEM: NUMBER OF ATOMS\n"
159190
dump_str += f"{atoms.get_global_number_of_atoms()}\n"
160191

dpgen2/superop/caly_evo_step.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def __init__(
7373
"results": InputArtifact(optional=True),
7474
"step": InputArtifact(optional=True),
7575
"opt_results_dir": InputArtifact(optional=True),
76+
"qhull_input": InputArtifact(optional=True),
7677
}
7778
self._output_parameters = {
7879
# "task_name": OutputParameter(),
@@ -177,6 +178,7 @@ def _caly_evo_step(
177178
"step": caly_evo_step_steps.inputs.artifacts["step"],
178179
"results": caly_evo_step_steps.inputs.artifacts["results"],
179180
"opt_results_dir": caly_evo_step_steps.inputs.artifacts["opt_results_dir"],
181+
"qhull_input": caly_evo_step_steps.inputs.artifacts["qhull_input"],
180182
},
181183
key="%s--collect-run-calypso-%s-%s"
182184
% (
@@ -245,13 +247,14 @@ def _caly_evo_step(
245247
], # input.dat
246248
"results": collect_run_calypso.outputs.artifacts["results"],
247249
"step": collect_run_calypso.outputs.artifacts["step"],
250+
"qhull_input": collect_run_calypso.outputs.artifacts["qhull_input"],
248251
"opt_results_dir": prep_run_dp_optim.outputs.artifacts["optim_results_dir"],
249252
"caly_run_opt_file": prep_run_dp_optim.outputs.artifacts[
250253
"caly_run_opt_file"
251-
], # input.dat
254+
],
252255
"caly_check_opt_file": prep_run_dp_optim.outputs.artifacts[
253256
"caly_check_opt_file"
254-
], # input.dat
257+
],
255258
},
256259
when="%s == false" % (collect_run_calypso.outputs.parameters["finished"]),
257260
)

dpgen2/superop/prep_run_calypso.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ def _prep_run_caly(
213213
"results": temp_value,
214214
"step": temp_value,
215215
"opt_results_dir": temp_value,
216+
"qhull_input": temp_value,
216217
},
217218
key=step_keys["caly-evo-step-{{item}}"],
218219
with_sequence=argo_sequence(

tests/mocked_ops.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,11 @@ def execute(
969969
work_dir = Path(ip["task_name"])
970970
work_dir.mkdir(exist_ok=True, parents=True)
971971

972+
qhull_input = (
973+
ip["qhull_input"].resolve()
974+
if ip["qhull_input"] is not None
975+
else ip["qhull_input"]
976+
)
972977
step = ip["step"].resolve() if ip["step"] is not None else ip["step"]
973978
results = (
974979
ip["results"].resolve() if ip["results"] is not None else ip["results"]
@@ -999,6 +1004,9 @@ def execute(
9991004
step_num = Path("step").read_text().strip()
10001005
Path("step").write_text(f"{int(step_num)+1}")
10011006

1007+
if qhull_input is None:
1008+
Path("test_qconvex.in").write_text("")
1009+
10021010
step_num = int(Path("step").read_text().strip())
10031011

10041012
if results is None:
@@ -1031,6 +1039,7 @@ def execute(
10311039
"input_file": work_dir.joinpath(input_file.name),
10321040
"results": work_dir.joinpath("results"),
10331041
"step": work_dir.joinpath("step"),
1042+
"qhull_input": work_dir.joinpath("test_qconvex.in"),
10341043
}
10351044
return OPIO(ret_dict)
10361045

@@ -1157,7 +1166,7 @@ def execute(
11571166
return OPIO(
11581167
{
11591168
"task_name": str(work_dir),
1160-
"traj": work_dir / dump_file_name,
1161-
"model_devi": work_dir / model_devi_file_name,
1169+
"traj": [work_dir / dump_file_name],
1170+
"model_devi": [work_dir / model_devi_file_name],
11621171
}
11631172
)

tests/op/test_collect_run_caly.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
calypso_input_file,
2929
calypso_log_name,
3030
)
31-
from dpgen2.op.collect_run_caly import CollRunCaly, get_max_step
31+
from dpgen2.op.collect_run_caly import CollRunCaly, get_value_from_inputdat
3232
from dpgen2.utils import (
3333
BinaryFileInput,
3434
)
@@ -44,7 +44,7 @@ def setUp(self):
4444
self.input_file_path = Path("input_file")
4545
self.input_file_path.mkdir(parents=True, exist_ok=True)
4646
self.input_file = self.input_file_path.joinpath(calypso_input_file)
47-
self.input_file.write_text("input.dat\nMaxStep=3\n")
47+
self.input_file.write_text("input.dat\nMaxStep=3\nVSC= T\n")
4848

4949
self.step_file = self.input_file_path.joinpath("step")
5050
self.step_file.write_text("3")
@@ -69,12 +69,15 @@ def tearDown(self):
6969
shutil.rmtree(Path(self.task_name), ignore_errors=True)
7070

7171
def test_get_max_step(self):
72-
max_step = get_max_step(self.input_file)
72+
max_step, vsc = get_value_from_inputdat(self.input_file)
7373
self.assertTrue(max_step == 3)
74+
self.assertTrue(vsc == True)
7475

7576
temp_input_file = self.input_file_path.joinpath("temp_input_dat")
7677
temp_input_file.write_text("input.dat\n")
77-
self.assertRaises(ValueError, get_max_step, temp_input_file)
78+
max_step, vsc = get_value_from_inputdat(temp_input_file)
79+
self.assertTrue(max_step == 0)
80+
self.assertTrue(vsc == False)
7881

7982
@patch("dpgen2.op.collect_run_caly.run_command")
8083
def test_step_st_maxstep_01(self, mocked_run):
@@ -109,6 +112,7 @@ def side_effect(*args, **kwargs):
109112
self.assertEqual(out["input_file"], self.input_file)
110113
self.assertEqual(out["step"], Path(self.task_name) / "step")
111114
self.assertEqual(out["results"], Path(self.task_name) / "results")
115+
self.assertEqual(out["qhull_input"], Path(self.task_name) / "test_qconvex.in")
112116
self.assertEqual(out["finished"], "false")
113117

114118
@patch("dpgen2.op.collect_run_caly.run_command")

0 commit comments

Comments
 (0)