Yuan (Cyrus) Chiang commited on
Commit
1d1ee87
·
unverified ·
1 Parent(s): 08a88d8

High-throughput EOS flow on alloy systems (#30)

Browse files

* refactor eos into task

* task name and cache

* avoid duplicate test on PR branches

* more returned data in eos

* return b1

* minor md test change

* add eos-alloy input file

* add symmetry

* support disabling concurrency

* optimize eos, md, opt tasks to handle exceptions; first successful HT eos-alloy flow

* change cache policy

* dictionary items

* add pytable dependency; add FIRE2 optimizer

* fixed hdf5 output hook

* rename task folder

* add example notebook

* increase method string length for hd5 store

* use file path

* implement safe hdf store to avoid inconsistencies from multiple processes

* add references in notebook and flow

* frechet filter in eos flow; add ternary plot

mlip_arena/data/__init__.py ADDED
File without changes
mlip_arena/data/local.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+
4
+ from pandas import HDFStore
5
+
6
+ # https://stackoverflow.com/questions/22522551/pandas-hdf5-as-a-database/29014295#29014295
7
+
8
+
9
+ class SafeHDFStore(HDFStore):
10
+ def __init__(self, *args, **kwargs):
11
+ probe_interval = kwargs.pop("probe_interval", 1)
12
+ self._lock = "%s.lock" % args[0]
13
+ while True:
14
+ try:
15
+ self._flock = os.open(self._lock, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
16
+ break
17
+ except FileExistsError:
18
+ time.sleep(probe_interval)
19
+
20
+ HDFStore.__init__(self, *args, **kwargs)
21
+
22
+ def __exit__(self, *args, **kwargs):
23
+ HDFStore.__exit__(self, *args, **kwargs)
24
+ os.close(self._flock)
25
+ os.remove(self._lock)
mlip_arena/models/__init__.py CHANGED
@@ -22,7 +22,7 @@ for model, metadata in REGISTRY.items():
22
  try:
23
  module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
24
  MLIPMap[model] = getattr(module, metadata["class"])
25
- except (ModuleNotFoundError, AttributeError) as e:
26
  print(e)
27
  continue
28
 
 
22
  try:
23
  module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
24
  MLIPMap[model] = getattr(module, metadata["class"])
25
+ except (ModuleNotFoundError, AttributeError, ValueError) as e:
26
  print(e)
27
  continue
28
 
mlip_arena/models/registry.yaml CHANGED
@@ -10,6 +10,7 @@ MACE-MP(M):
10
  datasets:
11
  - MPTrj # TODO: fake HF dataset repo
12
  cpu-tasks:
 
13
  gpu-tasks:
14
  - homonuclear-diatomics
15
  - stability
@@ -79,6 +80,8 @@ MatterSim:
79
  datasets:
80
  - MPTrj
81
  - Alexandria
 
 
82
  gpu-tasks:
83
  - homonuclear-diatomics
84
  github: https://github.com/microsoft/mattersim
@@ -101,7 +104,6 @@ ORB:
101
  datasets:
102
  - MPTrj # TODO: fake HF dataset repo
103
  - Alexandria
104
- cpu-tasks:
105
  gpu-tasks:
106
  - homonuclear-diatomics
107
  - combustion
@@ -125,7 +127,6 @@ SevenNet:
125
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
126
  datasets:
127
  - MPTrj # TODO: fake HF dataset repo
128
- cpu-tasks:
129
  gpu-tasks:
130
  - homonuclear-diatomics
131
  - stability
@@ -151,6 +152,8 @@ eqV2(OMat):
151
  - OMat
152
  - MPTrj
153
  - Alexandria
 
 
154
  gpu-tasks:
155
  - homonuclear-diatomics
156
  prediction: EFS
@@ -238,7 +241,6 @@ MACE-OFF(M):
238
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
239
  datasets:
240
  - SPICE # TODO: fake HF dataset repo
241
- cpu-tasks:
242
  gpu-tasks:
243
  - homonuclear-diatomics
244
  github: https://github.com/ACEsuit/mace
@@ -302,6 +304,8 @@ ORBv2:
302
  datasets:
303
  - MPTrj
304
  - Alexandria
 
 
305
  gpu-tasks:
306
  - homonuclear-diatomics
307
  github: https://github.com/orbital-materials/orb-models
 
10
  datasets:
11
  - MPTrj # TODO: fake HF dataset repo
12
  cpu-tasks:
13
+ - eos_alloy
14
  gpu-tasks:
15
  - homonuclear-diatomics
16
  - stability
 
80
  datasets:
81
  - MPTrj
82
  - Alexandria
83
+ cpu-tasks:
84
+ - eos_alloy
85
  gpu-tasks:
86
  - homonuclear-diatomics
87
  github: https://github.com/microsoft/mattersim
 
104
  datasets:
105
  - MPTrj # TODO: fake HF dataset repo
106
  - Alexandria
 
107
  gpu-tasks:
108
  - homonuclear-diatomics
109
  - combustion
 
127
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
128
  datasets:
129
  - MPTrj # TODO: fake HF dataset repo
 
130
  gpu-tasks:
131
  - homonuclear-diatomics
132
  - stability
 
152
  - OMat
153
  - MPTrj
154
  - Alexandria
155
+ cpu-tasks:
156
+ - eos_alloy
157
  gpu-tasks:
158
  - homonuclear-diatomics
159
  prediction: EFS
 
241
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
242
  datasets:
243
  - SPICE # TODO: fake HF dataset repo
 
244
  gpu-tasks:
245
  - homonuclear-diatomics
246
  github: https://github.com/ACEsuit/mace
 
304
  datasets:
305
  - MPTrj
306
  - Alexandria
307
+ cpu-tasks:
308
+ - eos_alloy
309
  gpu-tasks:
310
  - homonuclear-diatomics
311
  github: https://github.com/orbital-materials/orb-models
mlip_arena/tasks/diatomics/run.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
mlip_arena/tasks/eos.py CHANGED
@@ -6,13 +6,14 @@ https://github.com/materialsvirtuallab/matcalc/blob/main/matcalc/eos.py
6
 
7
  from __future__ import annotations
8
 
9
- from typing import TYPE_CHECKING
10
 
11
  import numpy as np
12
  from prefect import task
 
13
  from prefect.futures import wait
14
  from prefect.runtime import task_run
15
- from prefect.tasks import task_input_hash
16
 
17
  from ase import Atoms
18
  from ase.filters import * # type: ignore
@@ -39,21 +40,23 @@ def _generate_task_run_name():
39
  @task(
40
  name="EOS",
41
  task_run_name=_generate_task_run_name,
42
- cache_key_fn=task_input_hash,
 
43
  )
44
  def run(
45
  atoms: Atoms,
46
  calculator_name: str | MLIPEnum,
47
- calculator_kwargs: dict | None,
48
  device: str | None = None,
49
  optimizer: Optimizer | str = "BFGSLineSearch", # type: ignore
50
  optimizer_kwargs: dict | None = None,
51
- filter: Filter | str | None = None,
52
  filter_kwargs: dict | None = None,
53
  criterion: dict | None = None,
54
  max_abs_strain: float = 0.1,
55
  npoints: int = 11,
56
- ):
 
57
  """
58
  Compute the equation of state (EOS) for the given atoms and calculator.
59
 
@@ -69,11 +72,12 @@ def run(
69
  criterion: The criterion to use.
70
  max_abs_strain: The maximum absolute strain to use.
71
  npoints: The number of points to sample.
 
72
 
73
  Returns:
74
- A dictionary containing the EOS data, bulk modulus, equilibrium volume, and equilibrium energy.
75
  """
76
- first_relax = OPT(
77
  atoms=atoms,
78
  calculator_name=calculator_name,
79
  calculator_kwargs=calculator_kwargs,
@@ -83,8 +87,14 @@ def run(
83
  filter=filter,
84
  filter_kwargs=filter_kwargs,
85
  criterion=criterion,
 
86
  )
87
 
 
 
 
 
 
88
  relaxed = first_relax["atoms"]
89
 
90
  # p0 = relaxed.get_positions()
@@ -92,37 +102,57 @@ def run(
92
 
93
  factors = np.linspace(1 - max_abs_strain, 1 + max_abs_strain, npoints) ** (1 / 3)
94
 
95
- futures = []
96
- for f in factors:
97
- atoms = relaxed.copy()
98
- atoms.set_cell(c0 * f, scale_atoms=True)
99
-
100
- future = OPT.submit(
101
- atoms=atoms,
102
- calculator_name=calculator_name,
103
- calculator_kwargs=calculator_kwargs,
104
- device=device,
105
- optimizer=optimizer,
106
- optimizer_kwargs=optimizer_kwargs,
107
- filter=None,
108
- filter_kwargs=None,
109
- criterion=criterion,
110
- )
111
-
112
- futures.append(future)
113
-
114
- wait(futures)
115
-
116
- volumes = [
117
- f.result()["atoms"].get_volume()
118
- for f in futures
119
- if isinstance(f.result(), dict)
120
- ]
121
- energies = [
122
- f.result()["atoms"].get_potential_energy()
123
- for f in futures
124
- if isinstance(f.result(), dict)
125
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  volumes, energies = map(
128
  list,
@@ -136,6 +166,8 @@ def run(
136
  bm.fit()
137
 
138
  return {
 
 
139
  "eos": {"volumes": volumes, "energies": energies},
140
  "K": bm.b0_GPa,
141
  "b0": bm.b0,
 
6
 
7
  from __future__ import annotations
8
 
9
+ from typing import TYPE_CHECKING, Any
10
 
11
  import numpy as np
12
  from prefect import task
13
+ from prefect.cache_policies import INPUTS, TASK_SOURCE
14
  from prefect.futures import wait
15
  from prefect.runtime import task_run
16
+ from prefect.states import State
17
 
18
  from ase import Atoms
19
  from ase.filters import * # type: ignore
 
40
  @task(
41
  name="EOS",
42
  task_run_name=_generate_task_run_name,
43
+ cache_policy=TASK_SOURCE + INPUTS
44
+ # cache_key_fn=task_input_hash,
45
  )
46
  def run(
47
  atoms: Atoms,
48
  calculator_name: str | MLIPEnum,
49
+ calculator_kwargs: dict | None = None,
50
  device: str | None = None,
51
  optimizer: Optimizer | str = "BFGSLineSearch", # type: ignore
52
  optimizer_kwargs: dict | None = None,
53
+ filter: Filter | str | None = "FrechetCell", # type: ignore
54
  filter_kwargs: dict | None = None,
55
  criterion: dict | None = None,
56
  max_abs_strain: float = 0.1,
57
  npoints: int = 11,
58
+ concurrent: bool = True,
59
+ ) -> dict[str, Any] | State:
60
  """
61
  Compute the equation of state (EOS) for the given atoms and calculator.
62
 
 
72
  criterion: The criterion to use.
73
  max_abs_strain: The maximum absolute strain to use.
74
  npoints: The number of points to sample.
75
+ concurrent: Whether to relax multiple structures concurrently.
76
 
77
  Returns:
78
+ A dictionary containing the EOS data, bulk modulus, equilibrium volume, and equilibrium energy if successful. Otherwise, a prefect state object.
79
  """
80
+ state = OPT(
81
  atoms=atoms,
82
  calculator_name=calculator_name,
83
  calculator_kwargs=calculator_kwargs,
 
87
  filter=filter,
88
  filter_kwargs=filter_kwargs,
89
  criterion=criterion,
90
+ return_state=True,
91
  )
92
 
93
+ if state.is_failed():
94
+ return state
95
+
96
+ first_relax = state.result(raise_on_failure=False)
97
+ assert isinstance(first_relax, dict)
98
  relaxed = first_relax["atoms"]
99
 
100
  # p0 = relaxed.get_positions()
 
102
 
103
  factors = np.linspace(1 - max_abs_strain, 1 + max_abs_strain, npoints) ** (1 / 3)
104
 
105
+ if concurrent:
106
+ futures = []
107
+ for f in factors:
108
+ atoms = relaxed.copy()
109
+ atoms.set_cell(c0 * f, scale_atoms=True)
110
+
111
+ future = OPT.submit(
112
+ atoms=atoms,
113
+ calculator_name=calculator_name,
114
+ calculator_kwargs=calculator_kwargs,
115
+ device=device,
116
+ optimizer=optimizer,
117
+ optimizer_kwargs=optimizer_kwargs,
118
+ filter=None,
119
+ filter_kwargs=None,
120
+ criterion=criterion,
121
+ )
122
+ futures.append(future)
123
+
124
+ wait(futures)
125
+
126
+ results = [
127
+ f.result(raise_on_failure=False)
128
+ for f in futures
129
+ if future.state.is_completed()
130
+ ]
131
+ else:
132
+ states = []
133
+ for f in factors:
134
+ atoms = relaxed.copy()
135
+ atoms.set_cell(c0 * f, scale_atoms=True)
136
+
137
+ state = OPT(
138
+ atoms=atoms,
139
+ calculator_name=calculator_name,
140
+ calculator_kwargs=calculator_kwargs,
141
+ device=device,
142
+ optimizer=optimizer,
143
+ optimizer_kwargs=optimizer_kwargs,
144
+ filter=None,
145
+ filter_kwargs=None,
146
+ criterion=criterion,
147
+ return_state=True,
148
+ )
149
+ states.append(state)
150
+ results = [
151
+ s.result(raise_on_failure=False) for s in states if state.is_completed()
152
+ ]
153
+
154
+ volumes = [f["atoms"].get_volume() for f in results]
155
+ energies = [f["atoms"].get_potential_energy() for f in results]
156
 
157
  volumes, energies = map(
158
  list,
 
166
  bm.fit()
167
 
168
  return {
169
+ "atoms": relaxed,
170
+ "calculator_name": calculator_name,
171
  "eos": {"volumes": volumes, "energies": energies},
172
  "K": bm.b0_GPa,
173
  "b0": bm.b0,
mlip_arena/tasks/eos_alloy/__init__.py ADDED
File without changes
mlip_arena/tasks/eos_alloy/flow.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ from pathlib import Path
3
+ import json
4
+
5
+ import pandas as pd
6
+ from dask.distributed import Client
7
+ from dask_jobqueue import SLURMCluster
8
+ from huggingface_hub import hf_hub_download
9
+ from prefect import Task, flow, task
10
+ from prefect.client.schemas.objects import TaskRun
11
+ from prefect.futures import wait
12
+ from prefect.states import State
13
+ from prefect_dask import DaskTaskRunner
14
+
15
+ from ase.db import connect
16
+ from mlip_arena.data.local import SafeHDFStore
17
+ from mlip_arena.models import REGISTRY, MLIPEnum
18
+ from mlip_arena.tasks.eos import run as EOS
19
+
20
+
21
+ @task
22
+ def get_atoms_from_db(db_path: Path | str):
23
+ db_path = Path(db_path)
24
+ if not db_path.exists():
25
+ db_path = hf_hub_download(
26
+ repo_id="atomind/mlip-arena",
27
+ repo_type="dataset",
28
+ subfolder=f"{Path(__file__).parent.name}",
29
+ filename=str(db_path),
30
+ )
31
+ with connect(db_path) as db:
32
+ for row in db.select():
33
+ yield row.toatoms()
34
+
35
+
36
+ def save_to_hdf(
37
+ tsk: Task, run: TaskRun, state: State, fpath: Path | str, table_name: str
38
+ ):
39
+ """
40
+ Define a hook on completion of EOS task to save results to HDF5 file.
41
+ """
42
+
43
+ if run.state.is_completed():
44
+ result = run.state.result(raise_on_failure=False)
45
+
46
+ atoms = result["atoms"]
47
+ calculator_name = (
48
+ run.task_inputs["calculator_name"] or result["calculator_name"]
49
+ )
50
+
51
+ energies = [float(e) for e in result["eos"]["energies"]]
52
+
53
+ formula = atoms.get_chemical_formula()
54
+
55
+ df = pd.DataFrame(
56
+ {
57
+ "method": calculator_name,
58
+ "formula": formula,
59
+ "total_run_time": run.total_run_time,
60
+ "v0": result["v0"],
61
+ "e0": result["e0"],
62
+ "b0": result["b0"],
63
+ "b1": result["b1"],
64
+ "volume": result["eos"]["volumes"],
65
+ "energy": energies,
66
+ }
67
+ )
68
+
69
+ fpath = Path(fpath)
70
+ fpath = fpath.with_stem(fpath.stem + f"_{calculator_name}")
71
+
72
+ family_path = Path(__file__) / REGISTRY[calculator_name]["family"]
73
+ family_path.mkdir(parents=True, exist_ok=True)
74
+
75
+ with open(family_path / f"{calculator_name}_{formula}.json", "w") as f:
76
+ json.dump(result, f, indent=2)
77
+
78
+ with SafeHDFStore(fpath, mode="a") as store:
79
+ store.append(
80
+ table_name,
81
+ df,
82
+ format="table",
83
+ data_columns=True,
84
+ min_itemsize={"formula": 50, "method": 20},
85
+ )
86
+
87
+
88
+ @flow
89
+ def run_from_db(
90
+ db_path: Path | str,
91
+ out_path: Path | str,
92
+ table_name: str,
93
+ optimizer="FIRE",
94
+ optimizer_kwargs=None,
95
+ filter="FrechetCell",
96
+ filter_kwargs=None,
97
+ criterion=dict(fmax=0.1, steps=1000),
98
+ max_abs_strain=0.20,
99
+ concurrent=False,
100
+ ):
101
+ EOS_ = EOS.with_options(
102
+ on_completion=[partial(save_to_hdf, fpath=out_path, table_name=table_name)]
103
+ )
104
+
105
+ futures = []
106
+ for atoms in get_atoms_from_db(db_path):
107
+ for mlip in MLIPEnum:
108
+ if not REGISTRY[mlip.name]["npt"]:
109
+ continue
110
+ if Path(__file__).parent.name not in (
111
+ REGISTRY[mlip.name].get("cpu-tasks", [])
112
+ + REGISTRY[mlip.name].get("gpu-tasks", [])
113
+ ):
114
+ continue
115
+ future = EOS_.submit(
116
+ atoms=atoms,
117
+ calculator_name=mlip.name,
118
+ calculator_kwargs=dict(),
119
+ optimizer=optimizer,
120
+ optimizer_kwargs=optimizer_kwargs,
121
+ filter=filter,
122
+ filter_kwargs=filter_kwargs,
123
+ criterion=criterion,
124
+ max_abs_strain=max_abs_strain,
125
+ concurrent=concurrent,
126
+ )
127
+ futures.append(future)
128
+
129
+ wait(futures)
130
+
131
+ return [
132
+ f.result(timeout=None, raise_on_failure=False)
133
+ for f in futures
134
+ if f.state.is_completed()
135
+ ]
136
+
137
+
138
+ if __name__ == "__main__":
139
+ nodes_per_alloc = 1
140
+ gpus_per_alloc = 4
141
+ ntasks = 1
142
+
143
+ cluster_kwargs = dict(
144
+ cores=1,
145
+ memory="64 GB",
146
+ shebang="#!/bin/bash",
147
+ account="m3828",
148
+ walltime="00:30:00",
149
+ job_mem="0",
150
+ job_script_prologue=[
151
+ "source ~/.bashrc",
152
+ "module load python",
153
+ "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
154
+ ],
155
+ job_directives_skip=["-n", "--cpus-per-task", "-J"],
156
+ job_extra_directives=[
157
+ "-J eos",
158
+ "-q debug",
159
+ f"-N {nodes_per_alloc}",
160
+ "-C gpu",
161
+ f"-G {gpus_per_alloc}",
162
+ ],
163
+ )
164
+
165
+ cluster = SLURMCluster(**cluster_kwargs)
166
+ print(cluster.job_script())
167
+ cluster.adapt(minimum_jobs=2, maximum_jobs=2)
168
+ client = Client(cluster)
169
+
170
+ run_from_db_ = run_from_db.with_options(
171
+ task_runner=DaskTaskRunner(address=client.scheduler.address),
172
+ log_prints=True,
173
+ )
174
+
175
+ results = run_from_db_(
176
+ db_path="sqs_Fe-Ni-Cr.db", out_path="eos.h5", table_name="Fe-Ni-Cr"
177
+ )
mlip_arena/tasks/eos_alloy/input.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Generates a database of special quasi-random structures (SQS) from a template structure.
3
+
4
+ This script utilizes the `structuretoolkit <https://github.com/pyiron/structuretoolkit/tree/main>`_
5
+ to call `sqsgenerator <https://sqsgenerator.readthedocs.io/en/latest/index.html#>`_ to generate
6
+ SQS structures. The generated structures are saved to an ASE database file and optionally uploaded
7
+ to the Hugging Face Hub.
8
+
9
+ References
10
+ ~~~~~~~~~~
11
+ - Alvi, S. M. A. A., Janssen, J., Khatamsaz, D., Perez, D., Allaire, D., & Arroyave, R. (2024).
12
+ Hierarchical Gaussian Process-Based Bayesian Optimization for Materials Discovery in High
13
+ Entropy Alloy Spaces. *arXiv preprint arXiv:2410.04314*.
14
+ - Gehringer, D., Friák, M., & Holec, D. (2023). Models of configurationally-complex alloys made
15
+ simple. *Computer Physics Communications, 286*, 108664.
16
+
17
+ Authors
18
+ ~~~~~~~
19
+ - Jan Janssen (`@jan-janssen <https://github.com/jan-janssen>`_)
20
+ - Yuan Chiang (`@chiang-yuan <https://github.com/chiang-yuan>`_)
21
+ """
22
+
23
+ import os
24
+ from pathlib import Path
25
+
26
+ import numpy as np
27
+ from dotenv import load_dotenv
28
+ from huggingface_hub import HfApi
29
+ from tqdm.auto import tqdm
30
+
31
+ from ase import Atoms
32
+ from ase.build import bulk
33
+ from ase.db import connect
34
+
35
+
36
+ def body_order(n=32, b=5):
37
+ """
38
+ Generate all possible combinations of atomic counts for `b` species
39
+ that sum to `n`.
40
+ """
41
+ if b == 2:
42
+ return [[i, n - i] for i in range(n + 1)]
43
+ return [[i] + j for i in range(n + 1) for j in body_order(n=n - i, b=b - 1)]
44
+
45
+
46
+ def generate_sqs(structure_template, elements, counts):
47
+ """
48
+ Generate a special quasi-random structure (SQS) based on mole fractions.
49
+ """
50
+ import structuretoolkit as stk
51
+
52
+ mole_fractions = {
53
+ el: c / len(structure_template) for el, c in zip(elements, counts)
54
+ }
55
+ return stk.build.sqs_structures(
56
+ structure=structure_template,
57
+ mole_fractions=mole_fractions,
58
+ )[0]
59
+
60
+
61
+ def get_endmember(structure, conc_lst, elements):
62
+ """
63
+ Assign a single element to all atoms in the structure to create an endmember.
64
+ """
65
+ structure.symbols[:] = np.array(elements)[conc_lst != 0][0]
66
+ return structure
67
+
68
+
69
+ def generate_alloy_db(
70
+ structure_template: Atoms,
71
+ elements: list[str],
72
+ local_path: Path | None = None,
73
+ upload: bool = True,
74
+ repo_id: str = "atomind/mlip-arena",
75
+ ) -> Path:
76
+ # Load Hugging Face API token
77
+ load_dotenv()
78
+ hf_token = os.getenv("HF_TOKEN", None)
79
+
80
+ if upload and hf_token is None:
81
+ raise ValueError("HF_TOKEN environment variable not set.")
82
+
83
+ num_atoms = len(structure_template)
84
+ num_species = len(elements)
85
+
86
+ # Generate all possible atomic configurations
87
+ configurations = np.array(body_order(n=num_atoms, b=num_species))
88
+
89
+ # Prepare the database
90
+ db_path = (
91
+ local_path or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
92
+ )
93
+ db_path.unlink(missing_ok=True)
94
+
95
+ # Generate and save structures
96
+ with connect(db_path) as db:
97
+ for i, composition in tqdm(
98
+ enumerate(configurations), total=len(configurations)
99
+ ):
100
+ # Skip trivial cases where only one element is present
101
+ if sum(composition == 0) != len(elements) - 1:
102
+ atoms = generate_sqs(
103
+ structure_template=structure_template,
104
+ elements=np.array(elements)[composition != 0],
105
+ counts=composition[composition != 0],
106
+ )
107
+ else:
108
+ atoms = get_endmember(
109
+ structure=structure_template.copy(),
110
+ conc_lst=composition,
111
+ elements=elements,
112
+ )
113
+ db.write(atoms)
114
+
115
+ # Upload the database to Hugging Face Hub
116
+ if upload:
117
+ api = HfApi(token=hf_token)
118
+ api.upload_file(
119
+ path_or_fileobj=db_path,
120
+ path_in_repo=f"{Path(__file__).parent.name}/{db_path.name}",
121
+ repo_id=repo_id,
122
+ repo_type="dataset",
123
+ )
124
+ print(f"Database uploaded: {db_path}")
125
+
126
+ return db_path
127
+
128
+
129
+ if __name__ == "__main__":
130
+ structure_template = bulk("Al", a=3.6, cubic=True).repeat([2, 2, 2])
131
+ elements = ["Fe", "Ni", "Cr"]
132
+ generate_alloy_db(structure_template, elements, upload=True)
mlip_arena/tasks/eos_alloy/run.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
mlip_arena/tasks/md.py CHANGED
@@ -55,14 +55,14 @@ works thereof, in binary and source code form.
55
  from __future__ import annotations
56
 
57
  from collections.abc import Sequence
58
- from datetime import datetime, timedelta
59
  from pathlib import Path
60
  from typing import Literal
61
 
62
  import numpy as np
63
  from prefect import task
 
64
  from prefect.runtime import task_run
65
- from prefect.tasks import task_input_hash
66
  from scipy.interpolate import interp1d
67
  from scipy.linalg import schur
68
  from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
@@ -200,7 +200,8 @@ def _generate_task_run_name():
200
  @task(
201
  name="MD",
202
  task_run_name=_generate_task_run_name,
203
- cache_key_fn=task_input_hash,
 
204
  # cache_expiration=timedelta(days=1)
205
  )
206
  def run(
 
55
  from __future__ import annotations
56
 
57
  from collections.abc import Sequence
58
+ from datetime import datetime
59
  from pathlib import Path
60
  from typing import Literal
61
 
62
  import numpy as np
63
  from prefect import task
64
+ from prefect.cache_policies import INPUTS, TASK_SOURCE
65
  from prefect.runtime import task_run
 
66
  from scipy.interpolate import interp1d
67
  from scipy.linalg import schur
68
  from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
 
200
  @task(
201
  name="MD",
202
  task_run_name=_generate_task_run_name,
203
+ cache_policy=TASK_SOURCE + INPUTS
204
+ # cache_key_fn=task_input_hash,
205
  # cache_expiration=timedelta(days=1)
206
  )
207
  def run(
mlip_arena/tasks/optimize.py CHANGED
@@ -4,16 +4,15 @@ Define structure optimization tasks.
4
 
5
  from __future__ import annotations
6
 
7
- from datetime import timedelta
8
-
9
  from prefect import task
 
10
  from prefect.runtime import task_run
11
- from prefect.tasks import task_input_hash
12
  from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
13
 
14
  from ase import Atoms
15
  from ase.calculators.calculator import Calculator
16
  from ase.calculators.mixing import SumCalculator
 
17
  from ase.filters import * # type: ignore
18
  from ase.filters import Filter
19
  from ase.optimize import * # type: ignore
@@ -32,6 +31,7 @@ _valid_filters: dict[str, Filter] = {
32
  _valid_optimizers: dict[str, Optimizer] = {
33
  "MDMin": MDMin,
34
  "FIRE": FIRE,
 
35
  "LBFGS": LBFGS,
36
  "LBFGSLineSearch": LBFGSLineSearch,
37
  "BFGS": BFGS,
@@ -54,15 +54,16 @@ def _generate_task_run_name():
54
 
55
 
56
  @task(
57
- name="MD",
58
  task_run_name=_generate_task_run_name,
59
- cache_key_fn=task_input_hash,
 
60
  # cache_expiration=timedelta(days=1)
61
  )
62
  def run(
63
  atoms: Atoms,
64
  calculator_name: str | MLIPEnum,
65
- calculator_kwargs: dict | None,
66
  dispersion: str | None = None,
67
  dispersion_kwargs: dict | None = None,
68
  device: str | None = None,
@@ -71,7 +72,7 @@ def run(
71
  filter: Filter | str | None = None,
72
  filter_kwargs: dict | None = None,
73
  criterion: dict | None = None,
74
- # TODO: fix symmetry
75
  ):
76
  device = device or str(get_freer_device())
77
 
@@ -119,6 +120,9 @@ def run(
119
  optimizer_kwargs = optimizer_kwargs or {}
120
  criterion = criterion or {}
121
 
 
 
 
122
  if isinstance(filter, type) and issubclass(filter, Filter):
123
  filter_instance = filter(atoms, **filter_kwargs)
124
  print(f"Using filter: {filter_instance}")
@@ -131,7 +135,6 @@ def run(
131
  elif filter is None:
132
  optimizer_instance = optimizer(atoms, **optimizer_kwargs)
133
  print(f"Using optimizer: {optimizer_instance}")
134
-
135
  optimizer_instance.run(**criterion)
136
 
137
  return {
 
4
 
5
  from __future__ import annotations
6
 
 
 
7
  from prefect import task
8
+ from prefect.cache_policies import INPUTS, TASK_SOURCE
9
  from prefect.runtime import task_run
 
10
  from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
11
 
12
  from ase import Atoms
13
  from ase.calculators.calculator import Calculator
14
  from ase.calculators.mixing import SumCalculator
15
+ from ase.constraints import FixSymmetry
16
  from ase.filters import * # type: ignore
17
  from ase.filters import Filter
18
  from ase.optimize import * # type: ignore
 
31
  _valid_optimizers: dict[str, Optimizer] = {
32
  "MDMin": MDMin,
33
  "FIRE": FIRE,
34
+ "FIRE2": FIRE2,
35
  "LBFGS": LBFGS,
36
  "LBFGSLineSearch": LBFGSLineSearch,
37
  "BFGS": BFGS,
 
54
 
55
 
56
  @task(
57
+ name="OPT",
58
  task_run_name=_generate_task_run_name,
59
+ cache_policy=TASK_SOURCE + INPUTS
60
+ # cache_key_fn=task_input_hash,
61
  # cache_expiration=timedelta(days=1)
62
  )
63
  def run(
64
  atoms: Atoms,
65
  calculator_name: str | MLIPEnum,
66
+ calculator_kwargs: dict | None = None,
67
  dispersion: str | None = None,
68
  dispersion_kwargs: dict | None = None,
69
  device: str | None = None,
 
72
  filter: Filter | str | None = None,
73
  filter_kwargs: dict | None = None,
74
  criterion: dict | None = None,
75
+ symmetry: bool = False,
76
  ):
77
  device = device or str(get_freer_device())
78
 
 
120
  optimizer_kwargs = optimizer_kwargs or {}
121
  criterion = criterion or {}
122
 
123
+ if symmetry:
124
+ atoms.set_constraint(FixSymmetry(atoms))
125
+
126
  if isinstance(filter, type) and issubclass(filter, Filter):
127
  filter_instance = filter(atoms, **filter_kwargs)
128
  print(f"Using filter: {filter_instance}")
 
135
  elif filter is None:
136
  optimizer_instance = optimizer(atoms, **optimizer_kwargs)
137
  print(f"Using optimizer: {optimizer_instance}")
 
138
  optimizer_instance.run(**criterion)
139
 
140
  return {
mlip_arena/tasks/registry.yaml CHANGED
@@ -10,12 +10,12 @@ Thermal conductivity:
10
  task-layout: centered
11
  rank-page: thermal-conductivity
12
  High pressure stability:
13
- category: Molecular dynamics
14
  task-page: stability
15
  task-layout: centered
16
  rank-page:
17
  Combustion:
18
- category: Molecular dynamics
19
  task-page: combustion
20
  task-layout: centered
21
  rank-page: combustion
 
10
  task-layout: centered
11
  rank-page: thermal-conductivity
12
  High pressure stability:
13
+ category: Molecular Dynamics
14
  task-page: stability
15
  task-layout: centered
16
  rank-page:
17
  Combustion:
18
+ category: Molecular Dynamics
19
  task-page: combustion
20
  task-layout: centered
21
  rank-page: combustion
pyproject.toml CHANGED
@@ -42,6 +42,7 @@ run = [
42
  "prefect-dask",
43
  "dask",
44
  "dask_jobqueue",
 
45
  ]
46
  app = [
47
  "streamlit==1.38.0",
 
42
  "prefect-dask",
43
  "dask",
44
  "dask_jobqueue",
45
+ "tables",
46
  ]
47
  app = [
48
  "streamlit==1.38.0",
tests/test_eos.py CHANGED
@@ -27,6 +27,7 @@ def single_eos_flow(calculator_name):
27
  ),
28
  max_abs_strain=0.1,
29
  npoints=6,
 
30
  )
31
 
32
 
 
27
  ),
28
  max_abs_strain=0.1,
29
  npoints=6,
30
+ concurrent=True
31
  )
32
 
33