Spaces:
Running
Running
Yuan (Cyrus) Chiang
commited on
High-throughput EOS flow on alloy systems (#30)
Browse files* refactor eos into task
* task name and cache
* avoid duplicate test on PR branches
* more returned data in eos
* return b1
* minor md test change
* add eos-alloy input file
* add symmetry
* support disabling concurrency
* optimize eos, md, opt tasks to handle exceptions; first successful HT eos-alloy flow
* change cache policy
* dictionary items
* add pytable dependency; add FIRE2 optimizer
* fixed hdf5 output hook
* rename task folder
* add example notebook
* increase method string length for hd5 store
* use file path
* implement safe hdf store to avoid inconsistencies from multiple processes
* add references in notebook and flow
* frechet filter in eos flow; add ternary plot
- mlip_arena/data/__init__.py +0 -0
- mlip_arena/data/local.py +25 -0
- mlip_arena/models/__init__.py +1 -1
- mlip_arena/models/registry.yaml +7 -3
- mlip_arena/tasks/diatomics/run.ipynb +0 -0
- mlip_arena/tasks/eos.py +71 -39
- mlip_arena/tasks/eos_alloy/__init__.py +0 -0
- mlip_arena/tasks/eos_alloy/flow.py +177 -0
- mlip_arena/tasks/eos_alloy/input.py +132 -0
- mlip_arena/tasks/eos_alloy/run.ipynb +0 -0
- mlip_arena/tasks/md.py +4 -3
- mlip_arena/tasks/optimize.py +11 -8
- mlip_arena/tasks/registry.yaml +2 -2
- pyproject.toml +1 -0
- tests/test_eos.py +1 -0
mlip_arena/data/__init__.py
ADDED
File without changes
|
mlip_arena/data/local.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
|
4 |
+
from pandas import HDFStore
|
5 |
+
|
6 |
+
# https://stackoverflow.com/questions/22522551/pandas-hdf5-as-a-database/29014295#29014295
|
7 |
+
|
8 |
+
|
9 |
+
class SafeHDFStore(HDFStore):
|
10 |
+
def __init__(self, *args, **kwargs):
|
11 |
+
probe_interval = kwargs.pop("probe_interval", 1)
|
12 |
+
self._lock = "%s.lock" % args[0]
|
13 |
+
while True:
|
14 |
+
try:
|
15 |
+
self._flock = os.open(self._lock, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
16 |
+
break
|
17 |
+
except FileExistsError:
|
18 |
+
time.sleep(probe_interval)
|
19 |
+
|
20 |
+
HDFStore.__init__(self, *args, **kwargs)
|
21 |
+
|
22 |
+
def __exit__(self, *args, **kwargs):
|
23 |
+
HDFStore.__exit__(self, *args, **kwargs)
|
24 |
+
os.close(self._flock)
|
25 |
+
os.remove(self._lock)
|
mlip_arena/models/__init__.py
CHANGED
@@ -22,7 +22,7 @@ for model, metadata in REGISTRY.items():
|
|
22 |
try:
|
23 |
module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
|
24 |
MLIPMap[model] = getattr(module, metadata["class"])
|
25 |
-
except (ModuleNotFoundError, AttributeError) as e:
|
26 |
print(e)
|
27 |
continue
|
28 |
|
|
|
22 |
try:
|
23 |
module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
|
24 |
MLIPMap[model] = getattr(module, metadata["class"])
|
25 |
+
except (ModuleNotFoundError, AttributeError, ValueError) as e:
|
26 |
print(e)
|
27 |
continue
|
28 |
|
mlip_arena/models/registry.yaml
CHANGED
@@ -10,6 +10,7 @@ MACE-MP(M):
|
|
10 |
datasets:
|
11 |
- MPTrj # TODO: fake HF dataset repo
|
12 |
cpu-tasks:
|
|
|
13 |
gpu-tasks:
|
14 |
- homonuclear-diatomics
|
15 |
- stability
|
@@ -79,6 +80,8 @@ MatterSim:
|
|
79 |
datasets:
|
80 |
- MPTrj
|
81 |
- Alexandria
|
|
|
|
|
82 |
gpu-tasks:
|
83 |
- homonuclear-diatomics
|
84 |
github: https://github.com/microsoft/mattersim
|
@@ -101,7 +104,6 @@ ORB:
|
|
101 |
datasets:
|
102 |
- MPTrj # TODO: fake HF dataset repo
|
103 |
- Alexandria
|
104 |
-
cpu-tasks:
|
105 |
gpu-tasks:
|
106 |
- homonuclear-diatomics
|
107 |
- combustion
|
@@ -125,7 +127,6 @@ SevenNet:
|
|
125 |
datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
|
126 |
datasets:
|
127 |
- MPTrj # TODO: fake HF dataset repo
|
128 |
-
cpu-tasks:
|
129 |
gpu-tasks:
|
130 |
- homonuclear-diatomics
|
131 |
- stability
|
@@ -151,6 +152,8 @@ eqV2(OMat):
|
|
151 |
- OMat
|
152 |
- MPTrj
|
153 |
- Alexandria
|
|
|
|
|
154 |
gpu-tasks:
|
155 |
- homonuclear-diatomics
|
156 |
prediction: EFS
|
@@ -238,7 +241,6 @@ MACE-OFF(M):
|
|
238 |
datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
|
239 |
datasets:
|
240 |
- SPICE # TODO: fake HF dataset repo
|
241 |
-
cpu-tasks:
|
242 |
gpu-tasks:
|
243 |
- homonuclear-diatomics
|
244 |
github: https://github.com/ACEsuit/mace
|
@@ -302,6 +304,8 @@ ORBv2:
|
|
302 |
datasets:
|
303 |
- MPTrj
|
304 |
- Alexandria
|
|
|
|
|
305 |
gpu-tasks:
|
306 |
- homonuclear-diatomics
|
307 |
github: https://github.com/orbital-materials/orb-models
|
|
|
10 |
datasets:
|
11 |
- MPTrj # TODO: fake HF dataset repo
|
12 |
cpu-tasks:
|
13 |
+
- eos_alloy
|
14 |
gpu-tasks:
|
15 |
- homonuclear-diatomics
|
16 |
- stability
|
|
|
80 |
datasets:
|
81 |
- MPTrj
|
82 |
- Alexandria
|
83 |
+
cpu-tasks:
|
84 |
+
- eos_alloy
|
85 |
gpu-tasks:
|
86 |
- homonuclear-diatomics
|
87 |
github: https://github.com/microsoft/mattersim
|
|
|
104 |
datasets:
|
105 |
- MPTrj # TODO: fake HF dataset repo
|
106 |
- Alexandria
|
|
|
107 |
gpu-tasks:
|
108 |
- homonuclear-diatomics
|
109 |
- combustion
|
|
|
127 |
datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
|
128 |
datasets:
|
129 |
- MPTrj # TODO: fake HF dataset repo
|
|
|
130 |
gpu-tasks:
|
131 |
- homonuclear-diatomics
|
132 |
- stability
|
|
|
152 |
- OMat
|
153 |
- MPTrj
|
154 |
- Alexandria
|
155 |
+
cpu-tasks:
|
156 |
+
- eos_alloy
|
157 |
gpu-tasks:
|
158 |
- homonuclear-diatomics
|
159 |
prediction: EFS
|
|
|
241 |
datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
|
242 |
datasets:
|
243 |
- SPICE # TODO: fake HF dataset repo
|
|
|
244 |
gpu-tasks:
|
245 |
- homonuclear-diatomics
|
246 |
github: https://github.com/ACEsuit/mace
|
|
|
304 |
datasets:
|
305 |
- MPTrj
|
306 |
- Alexandria
|
307 |
+
cpu-tasks:
|
308 |
+
- eos_alloy
|
309 |
gpu-tasks:
|
310 |
- homonuclear-diatomics
|
311 |
github: https://github.com/orbital-materials/orb-models
|
mlip_arena/tasks/diatomics/run.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
mlip_arena/tasks/eos.py
CHANGED
@@ -6,13 +6,14 @@ https://github.com/materialsvirtuallab/matcalc/blob/main/matcalc/eos.py
|
|
6 |
|
7 |
from __future__ import annotations
|
8 |
|
9 |
-
from typing import TYPE_CHECKING
|
10 |
|
11 |
import numpy as np
|
12 |
from prefect import task
|
|
|
13 |
from prefect.futures import wait
|
14 |
from prefect.runtime import task_run
|
15 |
-
from prefect.
|
16 |
|
17 |
from ase import Atoms
|
18 |
from ase.filters import * # type: ignore
|
@@ -39,21 +40,23 @@ def _generate_task_run_name():
|
|
39 |
@task(
|
40 |
name="EOS",
|
41 |
task_run_name=_generate_task_run_name,
|
42 |
-
|
|
|
43 |
)
|
44 |
def run(
|
45 |
atoms: Atoms,
|
46 |
calculator_name: str | MLIPEnum,
|
47 |
-
calculator_kwargs: dict | None,
|
48 |
device: str | None = None,
|
49 |
optimizer: Optimizer | str = "BFGSLineSearch", # type: ignore
|
50 |
optimizer_kwargs: dict | None = None,
|
51 |
-
filter: Filter | str | None =
|
52 |
filter_kwargs: dict | None = None,
|
53 |
criterion: dict | None = None,
|
54 |
max_abs_strain: float = 0.1,
|
55 |
npoints: int = 11,
|
56 |
-
|
|
|
57 |
"""
|
58 |
Compute the equation of state (EOS) for the given atoms and calculator.
|
59 |
|
@@ -69,11 +72,12 @@ def run(
|
|
69 |
criterion: The criterion to use.
|
70 |
max_abs_strain: The maximum absolute strain to use.
|
71 |
npoints: The number of points to sample.
|
|
|
72 |
|
73 |
Returns:
|
74 |
-
A dictionary containing the EOS data, bulk modulus, equilibrium volume, and equilibrium energy.
|
75 |
"""
|
76 |
-
|
77 |
atoms=atoms,
|
78 |
calculator_name=calculator_name,
|
79 |
calculator_kwargs=calculator_kwargs,
|
@@ -83,8 +87,14 @@ def run(
|
|
83 |
filter=filter,
|
84 |
filter_kwargs=filter_kwargs,
|
85 |
criterion=criterion,
|
|
|
86 |
)
|
87 |
|
|
|
|
|
|
|
|
|
|
|
88 |
relaxed = first_relax["atoms"]
|
89 |
|
90 |
# p0 = relaxed.get_positions()
|
@@ -92,37 +102,57 @@ def run(
|
|
92 |
|
93 |
factors = np.linspace(1 - max_abs_strain, 1 + max_abs_strain, npoints) ** (1 / 3)
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
for f in
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
volumes, energies = map(
|
128 |
list,
|
@@ -136,6 +166,8 @@ def run(
|
|
136 |
bm.fit()
|
137 |
|
138 |
return {
|
|
|
|
|
139 |
"eos": {"volumes": volumes, "energies": energies},
|
140 |
"K": bm.b0_GPa,
|
141 |
"b0": bm.b0,
|
|
|
6 |
|
7 |
from __future__ import annotations
|
8 |
|
9 |
+
from typing import TYPE_CHECKING, Any
|
10 |
|
11 |
import numpy as np
|
12 |
from prefect import task
|
13 |
+
from prefect.cache_policies import INPUTS, TASK_SOURCE
|
14 |
from prefect.futures import wait
|
15 |
from prefect.runtime import task_run
|
16 |
+
from prefect.states import State
|
17 |
|
18 |
from ase import Atoms
|
19 |
from ase.filters import * # type: ignore
|
|
|
40 |
@task(
|
41 |
name="EOS",
|
42 |
task_run_name=_generate_task_run_name,
|
43 |
+
cache_policy=TASK_SOURCE + INPUTS
|
44 |
+
# cache_key_fn=task_input_hash,
|
45 |
)
|
46 |
def run(
|
47 |
atoms: Atoms,
|
48 |
calculator_name: str | MLIPEnum,
|
49 |
+
calculator_kwargs: dict | None = None,
|
50 |
device: str | None = None,
|
51 |
optimizer: Optimizer | str = "BFGSLineSearch", # type: ignore
|
52 |
optimizer_kwargs: dict | None = None,
|
53 |
+
filter: Filter | str | None = "FrechetCell", # type: ignore
|
54 |
filter_kwargs: dict | None = None,
|
55 |
criterion: dict | None = None,
|
56 |
max_abs_strain: float = 0.1,
|
57 |
npoints: int = 11,
|
58 |
+
concurrent: bool = True,
|
59 |
+
) -> dict[str, Any] | State:
|
60 |
"""
|
61 |
Compute the equation of state (EOS) for the given atoms and calculator.
|
62 |
|
|
|
72 |
criterion: The criterion to use.
|
73 |
max_abs_strain: The maximum absolute strain to use.
|
74 |
npoints: The number of points to sample.
|
75 |
+
concurrent: Whether to relax multiple structures concurrently.
|
76 |
|
77 |
Returns:
|
78 |
+
A dictionary containing the EOS data, bulk modulus, equilibrium volume, and equilibrium energy if successful. Otherwise, a prefect state object.
|
79 |
"""
|
80 |
+
state = OPT(
|
81 |
atoms=atoms,
|
82 |
calculator_name=calculator_name,
|
83 |
calculator_kwargs=calculator_kwargs,
|
|
|
87 |
filter=filter,
|
88 |
filter_kwargs=filter_kwargs,
|
89 |
criterion=criterion,
|
90 |
+
return_state=True,
|
91 |
)
|
92 |
|
93 |
+
if state.is_failed():
|
94 |
+
return state
|
95 |
+
|
96 |
+
first_relax = state.result(raise_on_failure=False)
|
97 |
+
assert isinstance(first_relax, dict)
|
98 |
relaxed = first_relax["atoms"]
|
99 |
|
100 |
# p0 = relaxed.get_positions()
|
|
|
102 |
|
103 |
factors = np.linspace(1 - max_abs_strain, 1 + max_abs_strain, npoints) ** (1 / 3)
|
104 |
|
105 |
+
if concurrent:
|
106 |
+
futures = []
|
107 |
+
for f in factors:
|
108 |
+
atoms = relaxed.copy()
|
109 |
+
atoms.set_cell(c0 * f, scale_atoms=True)
|
110 |
+
|
111 |
+
future = OPT.submit(
|
112 |
+
atoms=atoms,
|
113 |
+
calculator_name=calculator_name,
|
114 |
+
calculator_kwargs=calculator_kwargs,
|
115 |
+
device=device,
|
116 |
+
optimizer=optimizer,
|
117 |
+
optimizer_kwargs=optimizer_kwargs,
|
118 |
+
filter=None,
|
119 |
+
filter_kwargs=None,
|
120 |
+
criterion=criterion,
|
121 |
+
)
|
122 |
+
futures.append(future)
|
123 |
+
|
124 |
+
wait(futures)
|
125 |
+
|
126 |
+
results = [
|
127 |
+
f.result(raise_on_failure=False)
|
128 |
+
for f in futures
|
129 |
+
if future.state.is_completed()
|
130 |
+
]
|
131 |
+
else:
|
132 |
+
states = []
|
133 |
+
for f in factors:
|
134 |
+
atoms = relaxed.copy()
|
135 |
+
atoms.set_cell(c0 * f, scale_atoms=True)
|
136 |
+
|
137 |
+
state = OPT(
|
138 |
+
atoms=atoms,
|
139 |
+
calculator_name=calculator_name,
|
140 |
+
calculator_kwargs=calculator_kwargs,
|
141 |
+
device=device,
|
142 |
+
optimizer=optimizer,
|
143 |
+
optimizer_kwargs=optimizer_kwargs,
|
144 |
+
filter=None,
|
145 |
+
filter_kwargs=None,
|
146 |
+
criterion=criterion,
|
147 |
+
return_state=True,
|
148 |
+
)
|
149 |
+
states.append(state)
|
150 |
+
results = [
|
151 |
+
s.result(raise_on_failure=False) for s in states if state.is_completed()
|
152 |
+
]
|
153 |
+
|
154 |
+
volumes = [f["atoms"].get_volume() for f in results]
|
155 |
+
energies = [f["atoms"].get_potential_energy() for f in results]
|
156 |
|
157 |
volumes, energies = map(
|
158 |
list,
|
|
|
166 |
bm.fit()
|
167 |
|
168 |
return {
|
169 |
+
"atoms": relaxed,
|
170 |
+
"calculator_name": calculator_name,
|
171 |
"eos": {"volumes": volumes, "energies": energies},
|
172 |
"K": bm.b0_GPa,
|
173 |
"b0": bm.b0,
|
mlip_arena/tasks/eos_alloy/__init__.py
ADDED
File without changes
|
mlip_arena/tasks/eos_alloy/flow.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from functools import partial
|
2 |
+
from pathlib import Path
|
3 |
+
import json
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
from dask.distributed import Client
|
7 |
+
from dask_jobqueue import SLURMCluster
|
8 |
+
from huggingface_hub import hf_hub_download
|
9 |
+
from prefect import Task, flow, task
|
10 |
+
from prefect.client.schemas.objects import TaskRun
|
11 |
+
from prefect.futures import wait
|
12 |
+
from prefect.states import State
|
13 |
+
from prefect_dask import DaskTaskRunner
|
14 |
+
|
15 |
+
from ase.db import connect
|
16 |
+
from mlip_arena.data.local import SafeHDFStore
|
17 |
+
from mlip_arena.models import REGISTRY, MLIPEnum
|
18 |
+
from mlip_arena.tasks.eos import run as EOS
|
19 |
+
|
20 |
+
|
21 |
+
@task
|
22 |
+
def get_atoms_from_db(db_path: Path | str):
|
23 |
+
db_path = Path(db_path)
|
24 |
+
if not db_path.exists():
|
25 |
+
db_path = hf_hub_download(
|
26 |
+
repo_id="atomind/mlip-arena",
|
27 |
+
repo_type="dataset",
|
28 |
+
subfolder=f"{Path(__file__).parent.name}",
|
29 |
+
filename=str(db_path),
|
30 |
+
)
|
31 |
+
with connect(db_path) as db:
|
32 |
+
for row in db.select():
|
33 |
+
yield row.toatoms()
|
34 |
+
|
35 |
+
|
36 |
+
def save_to_hdf(
|
37 |
+
tsk: Task, run: TaskRun, state: State, fpath: Path | str, table_name: str
|
38 |
+
):
|
39 |
+
"""
|
40 |
+
Define a hook on completion of EOS task to save results to HDF5 file.
|
41 |
+
"""
|
42 |
+
|
43 |
+
if run.state.is_completed():
|
44 |
+
result = run.state.result(raise_on_failure=False)
|
45 |
+
|
46 |
+
atoms = result["atoms"]
|
47 |
+
calculator_name = (
|
48 |
+
run.task_inputs["calculator_name"] or result["calculator_name"]
|
49 |
+
)
|
50 |
+
|
51 |
+
energies = [float(e) for e in result["eos"]["energies"]]
|
52 |
+
|
53 |
+
formula = atoms.get_chemical_formula()
|
54 |
+
|
55 |
+
df = pd.DataFrame(
|
56 |
+
{
|
57 |
+
"method": calculator_name,
|
58 |
+
"formula": formula,
|
59 |
+
"total_run_time": run.total_run_time,
|
60 |
+
"v0": result["v0"],
|
61 |
+
"e0": result["e0"],
|
62 |
+
"b0": result["b0"],
|
63 |
+
"b1": result["b1"],
|
64 |
+
"volume": result["eos"]["volumes"],
|
65 |
+
"energy": energies,
|
66 |
+
}
|
67 |
+
)
|
68 |
+
|
69 |
+
fpath = Path(fpath)
|
70 |
+
fpath = fpath.with_stem(fpath.stem + f"_{calculator_name}")
|
71 |
+
|
72 |
+
family_path = Path(__file__) / REGISTRY[calculator_name]["family"]
|
73 |
+
family_path.mkdir(parents=True, exist_ok=True)
|
74 |
+
|
75 |
+
with open(family_path / f"{calculator_name}_{formula}.json", "w") as f:
|
76 |
+
json.dump(result, f, indent=2)
|
77 |
+
|
78 |
+
with SafeHDFStore(fpath, mode="a") as store:
|
79 |
+
store.append(
|
80 |
+
table_name,
|
81 |
+
df,
|
82 |
+
format="table",
|
83 |
+
data_columns=True,
|
84 |
+
min_itemsize={"formula": 50, "method": 20},
|
85 |
+
)
|
86 |
+
|
87 |
+
|
88 |
+
@flow
|
89 |
+
def run_from_db(
|
90 |
+
db_path: Path | str,
|
91 |
+
out_path: Path | str,
|
92 |
+
table_name: str,
|
93 |
+
optimizer="FIRE",
|
94 |
+
optimizer_kwargs=None,
|
95 |
+
filter="FrechetCell",
|
96 |
+
filter_kwargs=None,
|
97 |
+
criterion=dict(fmax=0.1, steps=1000),
|
98 |
+
max_abs_strain=0.20,
|
99 |
+
concurrent=False,
|
100 |
+
):
|
101 |
+
EOS_ = EOS.with_options(
|
102 |
+
on_completion=[partial(save_to_hdf, fpath=out_path, table_name=table_name)]
|
103 |
+
)
|
104 |
+
|
105 |
+
futures = []
|
106 |
+
for atoms in get_atoms_from_db(db_path):
|
107 |
+
for mlip in MLIPEnum:
|
108 |
+
if not REGISTRY[mlip.name]["npt"]:
|
109 |
+
continue
|
110 |
+
if Path(__file__).parent.name not in (
|
111 |
+
REGISTRY[mlip.name].get("cpu-tasks", [])
|
112 |
+
+ REGISTRY[mlip.name].get("gpu-tasks", [])
|
113 |
+
):
|
114 |
+
continue
|
115 |
+
future = EOS_.submit(
|
116 |
+
atoms=atoms,
|
117 |
+
calculator_name=mlip.name,
|
118 |
+
calculator_kwargs=dict(),
|
119 |
+
optimizer=optimizer,
|
120 |
+
optimizer_kwargs=optimizer_kwargs,
|
121 |
+
filter=filter,
|
122 |
+
filter_kwargs=filter_kwargs,
|
123 |
+
criterion=criterion,
|
124 |
+
max_abs_strain=max_abs_strain,
|
125 |
+
concurrent=concurrent,
|
126 |
+
)
|
127 |
+
futures.append(future)
|
128 |
+
|
129 |
+
wait(futures)
|
130 |
+
|
131 |
+
return [
|
132 |
+
f.result(timeout=None, raise_on_failure=False)
|
133 |
+
for f in futures
|
134 |
+
if f.state.is_completed()
|
135 |
+
]
|
136 |
+
|
137 |
+
|
138 |
+
if __name__ == "__main__":
|
139 |
+
nodes_per_alloc = 1
|
140 |
+
gpus_per_alloc = 4
|
141 |
+
ntasks = 1
|
142 |
+
|
143 |
+
cluster_kwargs = dict(
|
144 |
+
cores=1,
|
145 |
+
memory="64 GB",
|
146 |
+
shebang="#!/bin/bash",
|
147 |
+
account="m3828",
|
148 |
+
walltime="00:30:00",
|
149 |
+
job_mem="0",
|
150 |
+
job_script_prologue=[
|
151 |
+
"source ~/.bashrc",
|
152 |
+
"module load python",
|
153 |
+
"source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
|
154 |
+
],
|
155 |
+
job_directives_skip=["-n", "--cpus-per-task", "-J"],
|
156 |
+
job_extra_directives=[
|
157 |
+
"-J eos",
|
158 |
+
"-q debug",
|
159 |
+
f"-N {nodes_per_alloc}",
|
160 |
+
"-C gpu",
|
161 |
+
f"-G {gpus_per_alloc}",
|
162 |
+
],
|
163 |
+
)
|
164 |
+
|
165 |
+
cluster = SLURMCluster(**cluster_kwargs)
|
166 |
+
print(cluster.job_script())
|
167 |
+
cluster.adapt(minimum_jobs=2, maximum_jobs=2)
|
168 |
+
client = Client(cluster)
|
169 |
+
|
170 |
+
run_from_db_ = run_from_db.with_options(
|
171 |
+
task_runner=DaskTaskRunner(address=client.scheduler.address),
|
172 |
+
log_prints=True,
|
173 |
+
)
|
174 |
+
|
175 |
+
results = run_from_db_(
|
176 |
+
db_path="sqs_Fe-Ni-Cr.db", out_path="eos.h5", table_name="Fe-Ni-Cr"
|
177 |
+
)
|
mlip_arena/tasks/eos_alloy/input.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Generates a database of special quasi-random structures (SQS) from a template structure.
|
3 |
+
|
4 |
+
This script utilizes the `structuretoolkit <https://github.com/pyiron/structuretoolkit/tree/main>`_
|
5 |
+
to call `sqsgenerator <https://sqsgenerator.readthedocs.io/en/latest/index.html#>`_ to generate
|
6 |
+
SQS structures. The generated structures are saved to an ASE database file and optionally uploaded
|
7 |
+
to the Hugging Face Hub.
|
8 |
+
|
9 |
+
References
|
10 |
+
~~~~~~~~~~
|
11 |
+
- Alvi, S. M. A. A., Janssen, J., Khatamsaz, D., Perez, D., Allaire, D., & Arroyave, R. (2024).
|
12 |
+
Hierarchical Gaussian Process-Based Bayesian Optimization for Materials Discovery in High
|
13 |
+
Entropy Alloy Spaces. *arXiv preprint arXiv:2410.04314*.
|
14 |
+
- Gehringer, D., Friák, M., & Holec, D. (2023). Models of configurationally-complex alloys made
|
15 |
+
simple. *Computer Physics Communications, 286*, 108664.
|
16 |
+
|
17 |
+
Authors
|
18 |
+
~~~~~~~
|
19 |
+
- Jan Janssen (`@jan-janssen <https://github.com/jan-janssen>`_)
|
20 |
+
- Yuan Chiang (`@chiang-yuan <https://github.com/chiang-yuan>`_)
|
21 |
+
"""
|
22 |
+
|
23 |
+
import os
|
24 |
+
from pathlib import Path
|
25 |
+
|
26 |
+
import numpy as np
|
27 |
+
from dotenv import load_dotenv
|
28 |
+
from huggingface_hub import HfApi
|
29 |
+
from tqdm.auto import tqdm
|
30 |
+
|
31 |
+
from ase import Atoms
|
32 |
+
from ase.build import bulk
|
33 |
+
from ase.db import connect
|
34 |
+
|
35 |
+
|
36 |
+
def body_order(n=32, b=5):
|
37 |
+
"""
|
38 |
+
Generate all possible combinations of atomic counts for `b` species
|
39 |
+
that sum to `n`.
|
40 |
+
"""
|
41 |
+
if b == 2:
|
42 |
+
return [[i, n - i] for i in range(n + 1)]
|
43 |
+
return [[i] + j for i in range(n + 1) for j in body_order(n=n - i, b=b - 1)]
|
44 |
+
|
45 |
+
|
46 |
+
def generate_sqs(structure_template, elements, counts):
|
47 |
+
"""
|
48 |
+
Generate a special quasi-random structure (SQS) based on mole fractions.
|
49 |
+
"""
|
50 |
+
import structuretoolkit as stk
|
51 |
+
|
52 |
+
mole_fractions = {
|
53 |
+
el: c / len(structure_template) for el, c in zip(elements, counts)
|
54 |
+
}
|
55 |
+
return stk.build.sqs_structures(
|
56 |
+
structure=structure_template,
|
57 |
+
mole_fractions=mole_fractions,
|
58 |
+
)[0]
|
59 |
+
|
60 |
+
|
61 |
+
def get_endmember(structure, conc_lst, elements):
|
62 |
+
"""
|
63 |
+
Assign a single element to all atoms in the structure to create an endmember.
|
64 |
+
"""
|
65 |
+
structure.symbols[:] = np.array(elements)[conc_lst != 0][0]
|
66 |
+
return structure
|
67 |
+
|
68 |
+
|
69 |
+
def generate_alloy_db(
|
70 |
+
structure_template: Atoms,
|
71 |
+
elements: list[str],
|
72 |
+
local_path: Path | None = None,
|
73 |
+
upload: bool = True,
|
74 |
+
repo_id: str = "atomind/mlip-arena",
|
75 |
+
) -> Path:
|
76 |
+
# Load Hugging Face API token
|
77 |
+
load_dotenv()
|
78 |
+
hf_token = os.getenv("HF_TOKEN", None)
|
79 |
+
|
80 |
+
if upload and hf_token is None:
|
81 |
+
raise ValueError("HF_TOKEN environment variable not set.")
|
82 |
+
|
83 |
+
num_atoms = len(structure_template)
|
84 |
+
num_species = len(elements)
|
85 |
+
|
86 |
+
# Generate all possible atomic configurations
|
87 |
+
configurations = np.array(body_order(n=num_atoms, b=num_species))
|
88 |
+
|
89 |
+
# Prepare the database
|
90 |
+
db_path = (
|
91 |
+
local_path or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
|
92 |
+
)
|
93 |
+
db_path.unlink(missing_ok=True)
|
94 |
+
|
95 |
+
# Generate and save structures
|
96 |
+
with connect(db_path) as db:
|
97 |
+
for i, composition in tqdm(
|
98 |
+
enumerate(configurations), total=len(configurations)
|
99 |
+
):
|
100 |
+
# Skip trivial cases where only one element is present
|
101 |
+
if sum(composition == 0) != len(elements) - 1:
|
102 |
+
atoms = generate_sqs(
|
103 |
+
structure_template=structure_template,
|
104 |
+
elements=np.array(elements)[composition != 0],
|
105 |
+
counts=composition[composition != 0],
|
106 |
+
)
|
107 |
+
else:
|
108 |
+
atoms = get_endmember(
|
109 |
+
structure=structure_template.copy(),
|
110 |
+
conc_lst=composition,
|
111 |
+
elements=elements,
|
112 |
+
)
|
113 |
+
db.write(atoms)
|
114 |
+
|
115 |
+
# Upload the database to Hugging Face Hub
|
116 |
+
if upload:
|
117 |
+
api = HfApi(token=hf_token)
|
118 |
+
api.upload_file(
|
119 |
+
path_or_fileobj=db_path,
|
120 |
+
path_in_repo=f"{Path(__file__).parent.name}/{db_path.name}",
|
121 |
+
repo_id=repo_id,
|
122 |
+
repo_type="dataset",
|
123 |
+
)
|
124 |
+
print(f"Database uploaded: {db_path}")
|
125 |
+
|
126 |
+
return db_path
|
127 |
+
|
128 |
+
|
129 |
+
if __name__ == "__main__":
|
130 |
+
structure_template = bulk("Al", a=3.6, cubic=True).repeat([2, 2, 2])
|
131 |
+
elements = ["Fe", "Ni", "Cr"]
|
132 |
+
generate_alloy_db(structure_template, elements, upload=True)
|
mlip_arena/tasks/eos_alloy/run.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mlip_arena/tasks/md.py
CHANGED
@@ -55,14 +55,14 @@ works thereof, in binary and source code form.
|
|
55 |
from __future__ import annotations
|
56 |
|
57 |
from collections.abc import Sequence
|
58 |
-
from datetime import datetime
|
59 |
from pathlib import Path
|
60 |
from typing import Literal
|
61 |
|
62 |
import numpy as np
|
63 |
from prefect import task
|
|
|
64 |
from prefect.runtime import task_run
|
65 |
-
from prefect.tasks import task_input_hash
|
66 |
from scipy.interpolate import interp1d
|
67 |
from scipy.linalg import schur
|
68 |
from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
|
@@ -200,7 +200,8 @@ def _generate_task_run_name():
|
|
200 |
@task(
|
201 |
name="MD",
|
202 |
task_run_name=_generate_task_run_name,
|
203 |
-
|
|
|
204 |
# cache_expiration=timedelta(days=1)
|
205 |
)
|
206 |
def run(
|
|
|
55 |
from __future__ import annotations
|
56 |
|
57 |
from collections.abc import Sequence
|
58 |
+
from datetime import datetime
|
59 |
from pathlib import Path
|
60 |
from typing import Literal
|
61 |
|
62 |
import numpy as np
|
63 |
from prefect import task
|
64 |
+
from prefect.cache_policies import INPUTS, TASK_SOURCE
|
65 |
from prefect.runtime import task_run
|
|
|
66 |
from scipy.interpolate import interp1d
|
67 |
from scipy.linalg import schur
|
68 |
from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
|
|
|
200 |
@task(
|
201 |
name="MD",
|
202 |
task_run_name=_generate_task_run_name,
|
203 |
+
cache_policy=TASK_SOURCE + INPUTS
|
204 |
+
# cache_key_fn=task_input_hash,
|
205 |
# cache_expiration=timedelta(days=1)
|
206 |
)
|
207 |
def run(
|
mlip_arena/tasks/optimize.py
CHANGED
@@ -4,16 +4,15 @@ Define structure optimization tasks.
|
|
4 |
|
5 |
from __future__ import annotations
|
6 |
|
7 |
-
from datetime import timedelta
|
8 |
-
|
9 |
from prefect import task
|
|
|
10 |
from prefect.runtime import task_run
|
11 |
-
from prefect.tasks import task_input_hash
|
12 |
from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
|
13 |
|
14 |
from ase import Atoms
|
15 |
from ase.calculators.calculator import Calculator
|
16 |
from ase.calculators.mixing import SumCalculator
|
|
|
17 |
from ase.filters import * # type: ignore
|
18 |
from ase.filters import Filter
|
19 |
from ase.optimize import * # type: ignore
|
@@ -32,6 +31,7 @@ _valid_filters: dict[str, Filter] = {
|
|
32 |
_valid_optimizers: dict[str, Optimizer] = {
|
33 |
"MDMin": MDMin,
|
34 |
"FIRE": FIRE,
|
|
|
35 |
"LBFGS": LBFGS,
|
36 |
"LBFGSLineSearch": LBFGSLineSearch,
|
37 |
"BFGS": BFGS,
|
@@ -54,15 +54,16 @@ def _generate_task_run_name():
|
|
54 |
|
55 |
|
56 |
@task(
|
57 |
-
name="
|
58 |
task_run_name=_generate_task_run_name,
|
59 |
-
|
|
|
60 |
# cache_expiration=timedelta(days=1)
|
61 |
)
|
62 |
def run(
|
63 |
atoms: Atoms,
|
64 |
calculator_name: str | MLIPEnum,
|
65 |
-
calculator_kwargs: dict | None,
|
66 |
dispersion: str | None = None,
|
67 |
dispersion_kwargs: dict | None = None,
|
68 |
device: str | None = None,
|
@@ -71,7 +72,7 @@ def run(
|
|
71 |
filter: Filter | str | None = None,
|
72 |
filter_kwargs: dict | None = None,
|
73 |
criterion: dict | None = None,
|
74 |
-
|
75 |
):
|
76 |
device = device or str(get_freer_device())
|
77 |
|
@@ -119,6 +120,9 @@ def run(
|
|
119 |
optimizer_kwargs = optimizer_kwargs or {}
|
120 |
criterion = criterion or {}
|
121 |
|
|
|
|
|
|
|
122 |
if isinstance(filter, type) and issubclass(filter, Filter):
|
123 |
filter_instance = filter(atoms, **filter_kwargs)
|
124 |
print(f"Using filter: {filter_instance}")
|
@@ -131,7 +135,6 @@ def run(
|
|
131 |
elif filter is None:
|
132 |
optimizer_instance = optimizer(atoms, **optimizer_kwargs)
|
133 |
print(f"Using optimizer: {optimizer_instance}")
|
134 |
-
|
135 |
optimizer_instance.run(**criterion)
|
136 |
|
137 |
return {
|
|
|
4 |
|
5 |
from __future__ import annotations
|
6 |
|
|
|
|
|
7 |
from prefect import task
|
8 |
+
from prefect.cache_policies import INPUTS, TASK_SOURCE
|
9 |
from prefect.runtime import task_run
|
|
|
10 |
from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
|
11 |
|
12 |
from ase import Atoms
|
13 |
from ase.calculators.calculator import Calculator
|
14 |
from ase.calculators.mixing import SumCalculator
|
15 |
+
from ase.constraints import FixSymmetry
|
16 |
from ase.filters import * # type: ignore
|
17 |
from ase.filters import Filter
|
18 |
from ase.optimize import * # type: ignore
|
|
|
31 |
_valid_optimizers: dict[str, Optimizer] = {
|
32 |
"MDMin": MDMin,
|
33 |
"FIRE": FIRE,
|
34 |
+
"FIRE2": FIRE2,
|
35 |
"LBFGS": LBFGS,
|
36 |
"LBFGSLineSearch": LBFGSLineSearch,
|
37 |
"BFGS": BFGS,
|
|
|
54 |
|
55 |
|
56 |
@task(
|
57 |
+
name="OPT",
|
58 |
task_run_name=_generate_task_run_name,
|
59 |
+
cache_policy=TASK_SOURCE + INPUTS
|
60 |
+
# cache_key_fn=task_input_hash,
|
61 |
# cache_expiration=timedelta(days=1)
|
62 |
)
|
63 |
def run(
|
64 |
atoms: Atoms,
|
65 |
calculator_name: str | MLIPEnum,
|
66 |
+
calculator_kwargs: dict | None = None,
|
67 |
dispersion: str | None = None,
|
68 |
dispersion_kwargs: dict | None = None,
|
69 |
device: str | None = None,
|
|
|
72 |
filter: Filter | str | None = None,
|
73 |
filter_kwargs: dict | None = None,
|
74 |
criterion: dict | None = None,
|
75 |
+
symmetry: bool = False,
|
76 |
):
|
77 |
device = device or str(get_freer_device())
|
78 |
|
|
|
120 |
optimizer_kwargs = optimizer_kwargs or {}
|
121 |
criterion = criterion or {}
|
122 |
|
123 |
+
if symmetry:
|
124 |
+
atoms.set_constraint(FixSymmetry(atoms))
|
125 |
+
|
126 |
if isinstance(filter, type) and issubclass(filter, Filter):
|
127 |
filter_instance = filter(atoms, **filter_kwargs)
|
128 |
print(f"Using filter: {filter_instance}")
|
|
|
135 |
elif filter is None:
|
136 |
optimizer_instance = optimizer(atoms, **optimizer_kwargs)
|
137 |
print(f"Using optimizer: {optimizer_instance}")
|
|
|
138 |
optimizer_instance.run(**criterion)
|
139 |
|
140 |
return {
|
mlip_arena/tasks/registry.yaml
CHANGED
@@ -10,12 +10,12 @@ Thermal conductivity:
|
|
10 |
task-layout: centered
|
11 |
rank-page: thermal-conductivity
|
12 |
High pressure stability:
|
13 |
-
category: Molecular
|
14 |
task-page: stability
|
15 |
task-layout: centered
|
16 |
rank-page:
|
17 |
Combustion:
|
18 |
-
category: Molecular
|
19 |
task-page: combustion
|
20 |
task-layout: centered
|
21 |
rank-page: combustion
|
|
|
10 |
task-layout: centered
|
11 |
rank-page: thermal-conductivity
|
12 |
High pressure stability:
|
13 |
+
category: Molecular Dynamics
|
14 |
task-page: stability
|
15 |
task-layout: centered
|
16 |
rank-page:
|
17 |
Combustion:
|
18 |
+
category: Molecular Dynamics
|
19 |
task-page: combustion
|
20 |
task-layout: centered
|
21 |
rank-page: combustion
|
pyproject.toml
CHANGED
@@ -42,6 +42,7 @@ run = [
|
|
42 |
"prefect-dask",
|
43 |
"dask",
|
44 |
"dask_jobqueue",
|
|
|
45 |
]
|
46 |
app = [
|
47 |
"streamlit==1.38.0",
|
|
|
42 |
"prefect-dask",
|
43 |
"dask",
|
44 |
"dask_jobqueue",
|
45 |
+
"tables",
|
46 |
]
|
47 |
app = [
|
48 |
"streamlit==1.38.0",
|
tests/test_eos.py
CHANGED
@@ -27,6 +27,7 @@ def single_eos_flow(calculator_name):
|
|
27 |
),
|
28 |
max_abs_strain=0.1,
|
29 |
npoints=6,
|
|
|
30 |
)
|
31 |
|
32 |
|
|
|
27 |
),
|
28 |
max_abs_strain=0.1,
|
29 |
npoints=6,
|
30 |
+
concurrent=True
|
31 |
)
|
32 |
|
33 |
|