Skip to content

Commit ff6f410

Browse files
authored
Merge pull request #395 from anyangml2nd/feat/support-pressure-tasks
Feat: support pressure task
2 parents 46ea8d0 + 8d2088a commit ff6f410

7 files changed

Lines changed: 119 additions & 1 deletion

File tree

lambench/metrics/downstream_tasks_metrics.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,7 @@ rxn_barrier:
3232
domain: Molecules
3333
metrics: [MAE]
3434
dummy: {"MAE": 20.975}
35+
pressure:
36+
domain: Inorganic Materials
37+
metrics: [MAE]
38+
dummy: {"MAE": 2.505} # Estimated from the MAE between DFT and avg(DFT) over 270 structures

lambench/metrics/post_process.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel):
120120
"vacancy",
121121
"binding_energy",
122122
"rxn_barrier",
123+
"pressure",
123124
]:
124125
applicability_results[record.task_name] = record.metrics
125126
return applicability_results

lambench/metrics/results/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Large atomistic models (LAM), also known as machine learning interatomic potenti
1919
The following changes have been made compared to the previouly release version v0.3.1:
2020
- Added new models: MACE-MH-1, DPA-3.2-5M
2121
- Updated `Force Field Prediction` tasks, and for the domain of `Molecules`, two sets of labels were provided to support OMol25-trained models.
22-
- Added new `Property Calculation` tasks: oxygen vacancy formation energy prediction, protein-ligand binding energy prediction, and reaction energy barrier prediction.
22+
- Added new `Property Calculation` tasks: oxygen vacancy formation energy prediction, protein-ligand binding energy prediction, reaction energy barrier prediction, and volume prediction from materials under pressure.
2323

2424
<span style="color:red">⚠️ Note: To assess full LAM capacity, we use OMat24-trained task heads for *Force Field Prediction* in Inorganic Materials and Catalysis, and OMol25-trained task heads for Molecules, when available. As for *Property Calculation*, we follow a similar approach, but use OC20-trained task heads for Catalysis when available, as this tends to yield better performance.</span>
2525

lambench/metrics/results/metadata.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,22 @@
933933
"DISPLAY_NAME": "Success Rate",
934934
"DESCRIPTION": "The success rate of reaction barrier calculations."
935935
}
936+
},
937+
"pressure": {
938+
"DISPLAY_NAME": "Pressurized Materials",
939+
"DESCRIPTION": "Evaluation of the volume over 45 structures at elevated pressure from 25 GPa to 150 GPa. Structures are obtained from `Antoine Loew et al 2026 J. Phys. Mater. 9 015010. https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8.` ",
940+
"MAE": {
941+
"DISPLAY_NAME": "MAE (Å^3/atom)",
942+
"DESCRIPTION": "The mean absolute error of the volume per atom across all configurations and all pressures."
943+
},
944+
"RMSE": {
945+
"DISPLAY_NAME": "RMSE (Å^3/atom)",
946+
"DESCRIPTION": "The root mean squared error of the volume per atom across all configurations and all pressures."
947+
},
948+
"success_rate":{
949+
"DISPLAY_NAME": "Success Rate",
950+
"DESCRIPTION": "The success rate of volume calculations at elevated pressures."
951+
}
936952
}
937953
},
938954
"adaptability_results": {

lambench/models/ase_models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,13 @@ def evaluate(
308308

309309
assert task.test_data is not None
310310
return {"metrics": run_inference(self, task.test_data)}
311+
elif task.task_name == "pressure":
312+
from lambench.tasks.calculator.pressure.pressure import run_inference
313+
314+
assert task.test_data is not None
315+
fmax = task.calculator_params.get("fmax", 1e-3)
316+
max_steps = task.calculator_params.get("max_steps", 500)
317+
return {"metrics": run_inference(self, task.test_data, fmax, max_steps)}
311318
else:
312319
raise NotImplementedError(f"Task {task.task_name} is not implemented.")
313320

lambench/tasks/calculator/calculator_tasks.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,8 @@ binding_energy:
3737
rxn_barrier:
3838
test_data: /bohr/lambench-BH876-uplk/v1/BH876
3939
calculator_params: null
40+
pressure:
41+
test_data: /bohr/lambench-pressure-arjy/v1
42+
calculator_params:
43+
fmax: 0.001
44+
max_steps: 500
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# ruff: noqa: E402
2+
"""
3+
The test data is obtained from the following paper:
4+
5+
Antoine Loew et al 2026 J. Phys. Mater. 9 015010 Universal machine learning potentials under pressure
6+
DOI 10.1088/2515-7639/ae2ba8
7+
8+
We downsampled the original test set to 45 structures at each pressure point (25, 50, 75, 100, 125, 150 GPa)
9+
"""
10+
11+
from ase.io import read
12+
from ase import Atoms
13+
from ase.calculators.calculator import Calculator
14+
from ase.optimize import FIRE
15+
from ase.filters import FrechetCellFilter
16+
from pathlib import Path
17+
from tqdm import tqdm
18+
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
19+
from lambench.models.ase_models import ASEModel
20+
import logging
21+
22+
KBAR_2_EVA3 = 6.2415e-4
23+
GPA_2_KBAR = 10
24+
25+
26+
def optimize(structure: Atoms, target_p: float, fmax: float, steps: int) -> Atoms:
27+
target_p = target_p * GPA_2_KBAR * KBAR_2_EVA3 # to eV/A3
28+
cell_filter = FrechetCellFilter(structure, scalar_pressure=target_p)
29+
opt = FIRE(cell_filter)
30+
opt.run(fmax=fmax, steps=steps)
31+
return cell_filter.atoms
32+
33+
34+
def test_one(
35+
init: Atoms,
36+
final: Atoms,
37+
target_p: float,
38+
calc: Calculator,
39+
fmax: float,
40+
max_steps: int,
41+
) -> tuple[float, float]:
42+
init.calc = calc
43+
optimized = optimize(init, int(target_p), fmax, max_steps)
44+
natoms = len(init)
45+
return final.get_volume() / natoms, optimized.get_volume() / natoms
46+
47+
48+
def run_inference(
49+
model: ASEModel,
50+
test_data: Path,
51+
fmax: float,
52+
max_steps: int,
53+
) -> dict[str, float]:
54+
calc = model.calc
55+
all_labels = []
56+
all_preds = []
57+
num_samples = 0
58+
num_fails = 0
59+
60+
for pressure in tqdm(["025", "050", "075", "100", "125", "150"]):
61+
init_traj = read(test_data / f"P{pressure}.traj", ":")
62+
final_traj = read(test_data / f"P{pressure}.traj", ":")
63+
for i in tqdm(range(len(init_traj))):
64+
init = init_traj[i]
65+
final = final_traj[i]
66+
assert init.get_chemical_formula() == final.get_chemical_formula()
67+
try:
68+
dft, lam = test_one(init, final, int(pressure), calc, fmax, max_steps)
69+
except Exception as e:
70+
logging.error(
71+
f"Error during test_one at pressure {pressure}, index {i}: {e}"
72+
)
73+
dft, lam = None, None
74+
if dft is None or lam is None:
75+
num_fails += 1
76+
continue
77+
num_samples += 1
78+
all_labels.append(dft)
79+
all_preds.append(lam)
80+
81+
return {
82+
"MAE": mean_absolute_error(all_labels, all_preds), # A3/atom
83+
"RMSE": root_mean_squared_error(all_labels, all_preds), # A3/atom
84+
"success_rate": (num_samples - num_fails) / num_samples,
85+
}

0 commit comments

Comments
 (0)