Spaces:

atomind
/

mlip-arena

Running

Yuan (Cyrus) Chiang commited on Dec 18, 2024

Commit

1d1ee87

unverified ·

1 Parent(s): 08a88d8

High-throughput EOS flow on alloy systems (#30)

* refactor eos into task

* task name and cache

* avoid duplicate test on PR branches

* more returned data in eos

* return b1

* minor md test change

* add eos-alloy input file

* add symmetry

* support disabling concurrency

* optimize eos, md, opt tasks to handle exceptions; first successful HT eos-alloy flow

* change cache policy

* dictionary items

* add pytable dependency; add FIRE2 optimizer

* fixed hdf5 output hook

* rename task folder

* add example notebook

* increase method string length for hd5 store

* use file path

* implement safe hdf store to avoid inconsistencies from multiple processes

* add references in notebook and flow

* frechet filter in eos flow; add ternary plot

Files changed (15) hide show

mlip_arena/data/__init__.py +0 -0
mlip_arena/data/local.py +25 -0
mlip_arena/models/__init__.py +1 -1
mlip_arena/models/registry.yaml +7 -3
mlip_arena/tasks/diatomics/run.ipynb +0 -0
mlip_arena/tasks/eos.py +71 -39
mlip_arena/tasks/eos_alloy/__init__.py +0 -0
mlip_arena/tasks/eos_alloy/flow.py +177 -0
mlip_arena/tasks/eos_alloy/input.py +132 -0
mlip_arena/tasks/eos_alloy/run.ipynb +0 -0
mlip_arena/tasks/md.py +4 -3
mlip_arena/tasks/optimize.py +11 -8
mlip_arena/tasks/registry.yaml +2 -2
pyproject.toml +1 -0
tests/test_eos.py +1 -0

mlip_arena/data/__init__.py ADDED Viewed

File without changes

mlip_arena/data/local.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import time
+from pandas import HDFStore
+# https://stackoverflow.com/questions/22522551/pandas-hdf5-as-a-database/29014295#29014295
+class SafeHDFStore(HDFStore):
+    def __init__(self, *args, **kwargs):
+        probe_interval = kwargs.pop("probe_interval", 1)
+        self._lock = "%s.lock" % args[0]
+        while True:
+            try:
+                self._flock = os.open(self._lock, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+                break
+            except FileExistsError:
+                time.sleep(probe_interval)
+        HDFStore.__init__(self, *args, **kwargs)
+    def __exit__(self, *args, **kwargs):
+        HDFStore.__exit__(self, *args, **kwargs)
+        os.close(self._flock)
+        os.remove(self._lock)

mlip_arena/models/__init__.py CHANGED Viewed

@@ -22,7 +22,7 @@ for model, metadata in REGISTRY.items():
     try:
         module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
         MLIPMap[model] = getattr(module, metadata["class"])
-    except (ModuleNotFoundError, AttributeError) as e:
         print(e)
         continue

     try:
         module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
         MLIPMap[model] = getattr(module, metadata["class"])
+    except (ModuleNotFoundError, AttributeError, ValueError) as e:
         print(e)
         continue

mlip_arena/models/registry.yaml CHANGED Viewed

@@ -10,6 +10,7 @@ MACE-MP(M):
   datasets:
     - MPTrj # TODO: fake HF dataset repo
   cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
     - stability
@@ -79,6 +80,8 @@ MatterSim:
   datasets:
     - MPTrj
     - Alexandria
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/microsoft/mattersim
@@ -101,7 +104,6 @@ ORB:
   datasets:
     - MPTrj # TODO: fake HF dataset repo
     - Alexandria
-  cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
     - combustion
@@ -125,7 +127,6 @@ SevenNet:
   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets:
     - MPTrj # TODO: fake HF dataset repo
-  cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
     - stability
@@ -151,6 +152,8 @@ eqV2(OMat):
     - OMat
     - MPTrj
     - Alexandria
   gpu-tasks:
     - homonuclear-diatomics
   prediction: EFS
@@ -238,7 +241,6 @@ MACE-OFF(M):
   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets:
     - SPICE # TODO: fake HF dataset repo
-  cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/ACEsuit/mace
@@ -302,6 +304,8 @@ ORBv2:
   datasets:
     - MPTrj
     - Alexandria
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/orbital-materials/orb-models

   datasets:
     - MPTrj # TODO: fake HF dataset repo
   cpu-tasks:
+    - eos_alloy
   gpu-tasks:
     - homonuclear-diatomics
     - stability
   datasets:
     - MPTrj
     - Alexandria
+  cpu-tasks:
+    - eos_alloy
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/microsoft/mattersim
   datasets:
     - MPTrj # TODO: fake HF dataset repo
     - Alexandria
   gpu-tasks:
     - homonuclear-diatomics
     - combustion
   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets:
     - MPTrj # TODO: fake HF dataset repo
   gpu-tasks:
     - homonuclear-diatomics
     - stability
     - OMat
     - MPTrj
     - Alexandria
+  cpu-tasks:
+    - eos_alloy
   gpu-tasks:
     - homonuclear-diatomics
   prediction: EFS
   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets:
     - SPICE # TODO: fake HF dataset repo
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/ACEsuit/mace
   datasets:
     - MPTrj
     - Alexandria
+  cpu-tasks:
+    - eos_alloy
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/orbital-materials/orb-models

mlip_arena/tasks/diatomics/run.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

mlip_arena/tasks/eos.py CHANGED Viewed

@@ -6,13 +6,14 @@ https://github.com/materialsvirtuallab/matcalc/blob/main/matcalc/eos.py
 from __future__ import annotations
-from typing import TYPE_CHECKING
 import numpy as np
 from prefect import task
 from prefect.futures import wait
 from prefect.runtime import task_run
-from prefect.tasks import task_input_hash
 from ase import Atoms
 from ase.filters import *  # type: ignore
@@ -39,21 +40,23 @@ def _generate_task_run_name():
 @task(
     name="EOS",
     task_run_name=_generate_task_run_name,
-    cache_key_fn=task_input_hash,
 )
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
-    calculator_kwargs: dict | None,
     device: str | None = None,
     optimizer: Optimizer | str = "BFGSLineSearch",  # type: ignore
     optimizer_kwargs: dict | None = None,
-    filter: Filter | str | None = None,
     filter_kwargs: dict | None = None,
     criterion: dict | None = None,
     max_abs_strain: float = 0.1,
     npoints: int = 11,
-):
     """
     Compute the equation of state (EOS) for the given atoms and calculator.
@@ -69,11 +72,12 @@ def run(
         criterion: The criterion to use.
         max_abs_strain: The maximum absolute strain to use.
         npoints: The number of points to sample.
     Returns:
-        A dictionary containing the EOS data, bulk modulus, equilibrium volume, and equilibrium energy.
     """
-    first_relax = OPT(
         atoms=atoms,
         calculator_name=calculator_name,
         calculator_kwargs=calculator_kwargs,
@@ -83,8 +87,14 @@ def run(
         filter=filter,
         filter_kwargs=filter_kwargs,
         criterion=criterion,
     )
     relaxed = first_relax["atoms"]
     # p0 = relaxed.get_positions()
@@ -92,37 +102,57 @@ def run(
     factors = np.linspace(1 - max_abs_strain, 1 + max_abs_strain, npoints) ** (1 / 3)
-    futures = []
-    for f in factors:
-        atoms = relaxed.copy()
-        atoms.set_cell(c0 * f, scale_atoms=True)
-        future = OPT.submit(
-            atoms=atoms,
-            calculator_name=calculator_name,
-            calculator_kwargs=calculator_kwargs,
-            device=device,
-            optimizer=optimizer,
-            optimizer_kwargs=optimizer_kwargs,
-            filter=None,
-            filter_kwargs=None,
-            criterion=criterion,
-        )
-        futures.append(future)
-    wait(futures)
-    volumes = [
-        f.result()["atoms"].get_volume()
-        for f in futures
-        if isinstance(f.result(), dict)
-    ]
-    energies = [
-        f.result()["atoms"].get_potential_energy()
-        for f in futures
-        if isinstance(f.result(), dict)
-    ]
     volumes, energies = map(
         list,
@@ -136,6 +166,8 @@ def run(
     bm.fit()
     return {
         "eos": {"volumes": volumes, "energies": energies},
         "K": bm.b0_GPa,
         "b0": bm.b0,

 from __future__ import annotations
+from typing import TYPE_CHECKING, Any
 import numpy as np
 from prefect import task
+from prefect.cache_policies import INPUTS, TASK_SOURCE
 from prefect.futures import wait
 from prefect.runtime import task_run
+from prefect.states import State
 from ase import Atoms
 from ase.filters import *  # type: ignore
 @task(
     name="EOS",
     task_run_name=_generate_task_run_name,
+    cache_policy=TASK_SOURCE + INPUTS
+    # cache_key_fn=task_input_hash,
 )
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
+    calculator_kwargs: dict | None = None,
     device: str | None = None,
     optimizer: Optimizer | str = "BFGSLineSearch",  # type: ignore
     optimizer_kwargs: dict | None = None,
+    filter: Filter | str | None = "FrechetCell",  # type: ignore
     filter_kwargs: dict | None = None,
     criterion: dict | None = None,
     max_abs_strain: float = 0.1,
     npoints: int = 11,
+    concurrent: bool = True,
+) -> dict[str, Any] | State:
     """
     Compute the equation of state (EOS) for the given atoms and calculator.
         criterion: The criterion to use.
         max_abs_strain: The maximum absolute strain to use.
         npoints: The number of points to sample.
+        concurrent: Whether to relax multiple structures concurrently.
     Returns:
+        A dictionary containing the EOS data, bulk modulus, equilibrium volume, and equilibrium energy if successful. Otherwise, a prefect state object.
     """
+    state = OPT(
         atoms=atoms,
         calculator_name=calculator_name,
         calculator_kwargs=calculator_kwargs,
         filter=filter,
         filter_kwargs=filter_kwargs,
         criterion=criterion,
+        return_state=True,
     )
+    if state.is_failed():
+        return state
+    first_relax = state.result(raise_on_failure=False)
+    assert isinstance(first_relax, dict)
     relaxed = first_relax["atoms"]
     # p0 = relaxed.get_positions()
     factors = np.linspace(1 - max_abs_strain, 1 + max_abs_strain, npoints) ** (1 / 3)
+    if concurrent:
+        futures = []
+        for f in factors:
+            atoms = relaxed.copy()
+            atoms.set_cell(c0 * f, scale_atoms=True)
+            future = OPT.submit(
+                atoms=atoms,
+                calculator_name=calculator_name,
+                calculator_kwargs=calculator_kwargs,
+                device=device,
+                optimizer=optimizer,
+                optimizer_kwargs=optimizer_kwargs,
+                filter=None,
+                filter_kwargs=None,
+                criterion=criterion,
+            )
+            futures.append(future)
+        wait(futures)
+        results = [
+            f.result(raise_on_failure=False)
+            for f in futures
+            if future.state.is_completed()
+        ]
+    else:
+        states = []
+        for f in factors:
+            atoms = relaxed.copy()
+            atoms.set_cell(c0 * f, scale_atoms=True)
+            state = OPT(
+                atoms=atoms,
+                calculator_name=calculator_name,
+                calculator_kwargs=calculator_kwargs,
+                device=device,
+                optimizer=optimizer,
+                optimizer_kwargs=optimizer_kwargs,
+                filter=None,
+                filter_kwargs=None,
+                criterion=criterion,
+                return_state=True,
+            )
+            states.append(state)
+        results = [
+            s.result(raise_on_failure=False) for s in states if state.is_completed()
+        ]
+    volumes = [f["atoms"].get_volume() for f in results]
+    energies = [f["atoms"].get_potential_energy() for f in results]
     volumes, energies = map(
         list,
     bm.fit()
     return {
+        "atoms": relaxed,
+        "calculator_name": calculator_name,
         "eos": {"volumes": volumes, "energies": energies},
         "K": bm.b0_GPa,
         "b0": bm.b0,

mlip_arena/tasks/eos_alloy/__init__.py ADDED Viewed

File without changes

mlip_arena/tasks/eos_alloy/flow.py ADDED Viewed

	@@ -0,0 +1,177 @@

+from functools import partial
+from pathlib import Path
+import json
+import pandas as pd
+from dask.distributed import Client
+from dask_jobqueue import SLURMCluster
+from huggingface_hub import hf_hub_download
+from prefect import Task, flow, task
+from prefect.client.schemas.objects import TaskRun
+from prefect.futures import wait
+from prefect.states import State
+from prefect_dask import DaskTaskRunner
+from ase.db import connect
+from mlip_arena.data.local import SafeHDFStore
+from mlip_arena.models import REGISTRY, MLIPEnum
+from mlip_arena.tasks.eos import run as EOS
+@task
+def get_atoms_from_db(db_path: Path | str):
+    db_path = Path(db_path)
+    if not db_path.exists():
+        db_path = hf_hub_download(
+            repo_id="atomind/mlip-arena",
+            repo_type="dataset",
+            subfolder=f"{Path(__file__).parent.name}",
+            filename=str(db_path),
+        )
+    with connect(db_path) as db:
+        for row in db.select():
+            yield row.toatoms()
+def save_to_hdf(
+    tsk: Task, run: TaskRun, state: State, fpath: Path | str, table_name: str
+):
+    """
+    Define a hook on completion of EOS task to save results to HDF5 file.
+    """
+    if run.state.is_completed():
+        result = run.state.result(raise_on_failure=False)
+        atoms = result["atoms"]
+        calculator_name = (
+            run.task_inputs["calculator_name"] or result["calculator_name"]
+        )
+        energies = [float(e) for e in result["eos"]["energies"]]
+        formula = atoms.get_chemical_formula()
+        df = pd.DataFrame(
+            {
+                "method": calculator_name,
+                "formula": formula,
+                "total_run_time": run.total_run_time,
+                "v0": result["v0"],
+                "e0": result["e0"],
+                "b0": result["b0"],
+                "b1": result["b1"],
+                "volume": result["eos"]["volumes"],
+                "energy": energies,
+            }
+        )
+        fpath = Path(fpath)
+        fpath = fpath.with_stem(fpath.stem + f"_{calculator_name}")
+        family_path = Path(__file__) / REGISTRY[calculator_name]["family"]
+        family_path.mkdir(parents=True, exist_ok=True)
+        with open(family_path / f"{calculator_name}_{formula}.json", "w") as f:
+            json.dump(result, f, indent=2)
+        with SafeHDFStore(fpath, mode="a") as store:
+            store.append(
+                table_name,
+                df,
+                format="table",
+                data_columns=True,
+                min_itemsize={"formula": 50, "method": 20},
+            )
+@flow
+def run_from_db(
+    db_path: Path | str,
+    out_path: Path | str,
+    table_name: str,
+    optimizer="FIRE",
+    optimizer_kwargs=None,
+    filter="FrechetCell",
+    filter_kwargs=None,
+    criterion=dict(fmax=0.1, steps=1000),
+    max_abs_strain=0.20,
+    concurrent=False,
+):
+    EOS_ = EOS.with_options(
+        on_completion=[partial(save_to_hdf, fpath=out_path, table_name=table_name)]
+    )
+    futures = []
+    for atoms in get_atoms_from_db(db_path):
+        for mlip in MLIPEnum:
+            if not REGISTRY[mlip.name]["npt"]:
+                continue
+            if Path(__file__).parent.name not in (
+                REGISTRY[mlip.name].get("cpu-tasks", [])
+                + REGISTRY[mlip.name].get("gpu-tasks", [])
+            ):
+                continue
+            future = EOS_.submit(
+                atoms=atoms,
+                calculator_name=mlip.name,
+                calculator_kwargs=dict(),
+                optimizer=optimizer,
+                optimizer_kwargs=optimizer_kwargs,
+                filter=filter,
+                filter_kwargs=filter_kwargs,
+                criterion=criterion,
+                max_abs_strain=max_abs_strain,
+                concurrent=concurrent,
+            )
+            futures.append(future)
+    wait(futures)
+    return [
+        f.result(timeout=None, raise_on_failure=False)
+        for f in futures
+        if f.state.is_completed()
+    ]
+if __name__ == "__main__":
+    nodes_per_alloc = 1
+    gpus_per_alloc = 4
+    ntasks = 1
+    cluster_kwargs = dict(
+        cores=1,
+        memory="64 GB",
+        shebang="#!/bin/bash",
+        account="m3828",
+        walltime="00:30:00",
+        job_mem="0",
+        job_script_prologue=[
+            "source ~/.bashrc",
+            "module load python",
+            "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
+        ],
+        job_directives_skip=["-n", "--cpus-per-task", "-J"],
+        job_extra_directives=[
+            "-J eos",
+            "-q debug",
+            f"-N {nodes_per_alloc}",
+            "-C gpu",
+            f"-G {gpus_per_alloc}",
+        ],
+    )
+    cluster = SLURMCluster(**cluster_kwargs)
+    print(cluster.job_script())
+    cluster.adapt(minimum_jobs=2, maximum_jobs=2)
+    client = Client(cluster)
+    run_from_db_ = run_from_db.with_options(
+        task_runner=DaskTaskRunner(address=client.scheduler.address),
+        log_prints=True,
+    )
+    results = run_from_db_(
+        db_path="sqs_Fe-Ni-Cr.db", out_path="eos.h5", table_name="Fe-Ni-Cr"
+    )

mlip_arena/tasks/eos_alloy/input.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""
+Generates a database of special quasi-random structures (SQS) from a template structure.
+This script utilizes the `structuretoolkit <https://github.com/pyiron/structuretoolkit/tree/main>`_
+to call `sqsgenerator <https://sqsgenerator.readthedocs.io/en/latest/index.html#>`_ to generate
+SQS structures. The generated structures are saved to an ASE database file and optionally uploaded
+to the Hugging Face Hub.
+References
+~~~~~~~~~~
+- Alvi, S. M. A. A., Janssen, J., Khatamsaz, D., Perez, D., Allaire, D., & Arroyave, R. (2024).
+  Hierarchical Gaussian Process-Based Bayesian Optimization for Materials Discovery in High
+  Entropy Alloy Spaces. *arXiv preprint arXiv:2410.04314*.
+- Gehringer, D., Friák, M., & Holec, D. (2023). Models of configurationally-complex alloys made
+  simple. *Computer Physics Communications, 286*, 108664.
+Authors
+~~~~~~~
+- Jan Janssen (`@jan-janssen <https://github.com/jan-janssen>`_)
+- Yuan Chiang (`@chiang-yuan <https://github.com/chiang-yuan>`_)
+"""
+import os
+from pathlib import Path
+import numpy as np
+from dotenv import load_dotenv
+from huggingface_hub import HfApi
+from tqdm.auto import tqdm
+from ase import Atoms
+from ase.build import bulk
+from ase.db import connect
+def body_order(n=32, b=5):
+    """
+    Generate all possible combinations of atomic counts for `b` species
+    that sum to `n`.
+    """
+    if b == 2:
+        return [[i, n - i] for i in range(n + 1)]
+    return [[i] + j for i in range(n + 1) for j in body_order(n=n - i, b=b - 1)]
+def generate_sqs(structure_template, elements, counts):
+    """
+    Generate a special quasi-random structure (SQS) based on mole fractions.
+    """
+    import structuretoolkit as stk
+    mole_fractions = {
+        el: c / len(structure_template) for el, c in zip(elements, counts)
+    }
+    return stk.build.sqs_structures(
+        structure=structure_template,
+        mole_fractions=mole_fractions,
+    )[0]
+def get_endmember(structure, conc_lst, elements):
+    """
+    Assign a single element to all atoms in the structure to create an endmember.
+    """
+    structure.symbols[:] = np.array(elements)[conc_lst != 0][0]
+    return structure
+def generate_alloy_db(
+    structure_template: Atoms,
+    elements: list[str],
+    local_path: Path | None = None,
+    upload: bool = True,
+    repo_id: str = "atomind/mlip-arena",
+) -> Path:
+    # Load Hugging Face API token
+    load_dotenv()
+    hf_token = os.getenv("HF_TOKEN", None)
+    if upload and hf_token is None:
+        raise ValueError("HF_TOKEN environment variable not set.")
+    num_atoms = len(structure_template)
+    num_species = len(elements)
+    # Generate all possible atomic configurations
+    configurations = np.array(body_order(n=num_atoms, b=num_species))
+    # Prepare the database
+    db_path = (
+        local_path or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
+    )
+    db_path.unlink(missing_ok=True)
+    # Generate and save structures
+    with connect(db_path) as db:
+        for i, composition in tqdm(
+            enumerate(configurations), total=len(configurations)
+        ):
+            # Skip trivial cases where only one element is present
+            if sum(composition == 0) != len(elements) - 1:
+                atoms = generate_sqs(
+                    structure_template=structure_template,
+                    elements=np.array(elements)[composition != 0],
+                    counts=composition[composition != 0],
+                )
+            else:
+                atoms = get_endmember(
+                    structure=structure_template.copy(),
+                    conc_lst=composition,
+                    elements=elements,
+                )
+            db.write(atoms)
+    # Upload the database to Hugging Face Hub
+    if upload:
+        api = HfApi(token=hf_token)
+        api.upload_file(
+            path_or_fileobj=db_path,
+            path_in_repo=f"{Path(__file__).parent.name}/{db_path.name}",
+            repo_id=repo_id,
+            repo_type="dataset",
+        )
+        print(f"Database uploaded: {db_path}")
+    return db_path
+if __name__ == "__main__":
+    structure_template = bulk("Al", a=3.6, cubic=True).repeat([2, 2, 2])
+    elements = ["Fe", "Ni", "Cr"]
+    generate_alloy_db(structure_template, elements, upload=True)

mlip_arena/tasks/eos_alloy/run.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

mlip_arena/tasks/md.py CHANGED Viewed

@@ -55,14 +55,14 @@ works thereof, in binary and source code form.
 from __future__ import annotations
 from collections.abc import Sequence
-from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Literal
 import numpy as np
 from prefect import task
 from prefect.runtime import task_run
-from prefect.tasks import task_input_hash
 from scipy.interpolate import interp1d
 from scipy.linalg import schur
 from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
@@ -200,7 +200,8 @@ def _generate_task_run_name():
 @task(
     name="MD",
     task_run_name=_generate_task_run_name,
-    cache_key_fn=task_input_hash,
     # cache_expiration=timedelta(days=1)
 )
 def run(

 from __future__ import annotations
 from collections.abc import Sequence
+from datetime import datetime
 from pathlib import Path
 from typing import Literal
 import numpy as np
 from prefect import task
+from prefect.cache_policies import INPUTS, TASK_SOURCE
 from prefect.runtime import task_run
 from scipy.interpolate import interp1d
 from scipy.linalg import schur
 from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
 @task(
     name="MD",
     task_run_name=_generate_task_run_name,
+    cache_policy=TASK_SOURCE + INPUTS
+    # cache_key_fn=task_input_hash,
     # cache_expiration=timedelta(days=1)
 )
 def run(

mlip_arena/tasks/optimize.py CHANGED Viewed

@@ -4,16 +4,15 @@ Define structure optimization tasks.
 from __future__ import annotations
-from datetime import timedelta
 from prefect import task
 from prefect.runtime import task_run
-from prefect.tasks import task_input_hash
 from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
 from ase import Atoms
 from ase.calculators.calculator import Calculator
 from ase.calculators.mixing import SumCalculator
 from ase.filters import *  # type: ignore
 from ase.filters import Filter
 from ase.optimize import *  # type: ignore
@@ -32,6 +31,7 @@ _valid_filters: dict[str, Filter] = {
 _valid_optimizers: dict[str, Optimizer] = {
     "MDMin": MDMin,
     "FIRE": FIRE,
     "LBFGS": LBFGS,
     "LBFGSLineSearch": LBFGSLineSearch,
     "BFGS": BFGS,
@@ -54,15 +54,16 @@ def _generate_task_run_name():
 @task(
-    name="MD",
     task_run_name=_generate_task_run_name,
-    cache_key_fn=task_input_hash,
     # cache_expiration=timedelta(days=1)
 )
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
-    calculator_kwargs: dict | None,
     dispersion: str | None = None,
     dispersion_kwargs: dict | None = None,
     device: str | None = None,
@@ -71,7 +72,7 @@ def run(
     filter: Filter | str | None = None,
     filter_kwargs: dict | None = None,
     criterion: dict | None = None,
-    # TODO: fix symmetry
 ):
     device = device or str(get_freer_device())
@@ -119,6 +120,9 @@ def run(
     optimizer_kwargs = optimizer_kwargs or {}
     criterion = criterion or {}
     if isinstance(filter, type) and issubclass(filter, Filter):
         filter_instance = filter(atoms, **filter_kwargs)
         print(f"Using filter: {filter_instance}")
@@ -131,7 +135,6 @@ def run(
     elif filter is None:
         optimizer_instance = optimizer(atoms, **optimizer_kwargs)
         print(f"Using optimizer: {optimizer_instance}")
         optimizer_instance.run(**criterion)
     return {

 from __future__ import annotations
 from prefect import task
+from prefect.cache_policies import INPUTS, TASK_SOURCE
 from prefect.runtime import task_run
 from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
 from ase import Atoms
 from ase.calculators.calculator import Calculator
 from ase.calculators.mixing import SumCalculator
+from ase.constraints import FixSymmetry
 from ase.filters import *  # type: ignore
 from ase.filters import Filter
 from ase.optimize import *  # type: ignore
 _valid_optimizers: dict[str, Optimizer] = {
     "MDMin": MDMin,
     "FIRE": FIRE,
+    "FIRE2": FIRE2,
     "LBFGS": LBFGS,
     "LBFGSLineSearch": LBFGSLineSearch,
     "BFGS": BFGS,
 @task(
+    name="OPT",
     task_run_name=_generate_task_run_name,
+    cache_policy=TASK_SOURCE + INPUTS
+    # cache_key_fn=task_input_hash,
     # cache_expiration=timedelta(days=1)
 )
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
+    calculator_kwargs: dict | None = None,
     dispersion: str | None = None,
     dispersion_kwargs: dict | None = None,
     device: str | None = None,
     filter: Filter | str | None = None,
     filter_kwargs: dict | None = None,
     criterion: dict | None = None,
+    symmetry: bool = False,
 ):
     device = device or str(get_freer_device())
     optimizer_kwargs = optimizer_kwargs or {}
     criterion = criterion or {}
+    if symmetry:
+        atoms.set_constraint(FixSymmetry(atoms))
     if isinstance(filter, type) and issubclass(filter, Filter):
         filter_instance = filter(atoms, **filter_kwargs)
         print(f"Using filter: {filter_instance}")
     elif filter is None:
         optimizer_instance = optimizer(atoms, **optimizer_kwargs)
         print(f"Using optimizer: {optimizer_instance}")
         optimizer_instance.run(**criterion)
     return {

mlip_arena/tasks/registry.yaml CHANGED Viewed

@@ -10,12 +10,12 @@ Thermal conductivity:
   task-layout: centered
   rank-page: thermal-conductivity
 High pressure stability:
-  category: Molecular dynamics
   task-page: stability
   task-layout: centered
   rank-page:
 Combustion:
-  category: Molecular dynamics
   task-page: combustion
   task-layout: centered
   rank-page: combustion

   task-layout: centered
   rank-page: thermal-conductivity
 High pressure stability:
+  category: Molecular Dynamics
   task-page: stability
   task-layout: centered
   rank-page:
 Combustion:
+  category: Molecular Dynamics
   task-page: combustion
   task-layout: centered
   rank-page: combustion

pyproject.toml CHANGED Viewed

@@ -42,6 +42,7 @@ run = [
     "prefect-dask",
     "dask",
     "dask_jobqueue",
 ]
 app = [
     "streamlit==1.38.0",

     "prefect-dask",
     "dask",
     "dask_jobqueue",
+    "tables",
 ]
 app = [
     "streamlit==1.38.0",

tests/test_eos.py CHANGED Viewed

@@ -27,6 +27,7 @@ def single_eos_flow(calculator_name):
         ),
         max_abs_strain=0.1,
         npoints=6,
     )

         ),
         max_abs_strain=0.1,
         npoints=6,
+        concurrent=True
     )