Spaces:
Running
Running
File size: 5,573 Bytes
1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 1d1ee87 aadf5d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
"""
Generates a database of special quasi-random structures (SQS) from a template structure.
This script utilizes the `structuretoolkit <https://github.com/pyiron/structuretoolkit/tree/main>`_
to call `sqsgenerator <https://sqsgenerator.readthedocs.io/en/latest/index.html#>`_ to generate
SQS structures. The generated structures are saved to an ASE database file and optionally uploaded
to the Hugging Face Hub.
References
~~~~~~~~~~
- Alvi, S. M. A. A., Janssen, J., Khatamsaz, D., Perez, D., Allaire, D., & Arroyave, R. (2024).
Hierarchical Gaussian Process-Based Bayesian Optimization for Materials Discovery in High
Entropy Alloy Spaces. *arXiv preprint arXiv:2410.04314*.
- Gehringer, D., Friák, M., & Holec, D. (2023). Models of configurationally-complex alloys made
simple. *Computer Physics Communications, 286*, 108664.
Authors
~~~~~~~
- Jan Janssen (`@jan-janssen <https://github.com/jan-janssen>`_)
- Yuan Chiang (`@chiang-yuan <https://github.com/chiang-yuan>`_)
"""
import os
from pathlib import Path
from typing import Generator, Iterable
import numpy as np
from huggingface_hub import HfApi, hf_hub_download
from prefect import task
from tqdm.auto import tqdm
from ase import Atoms
from ase.db import connect
def save_to_db(
atoms_list: list[Atoms] | Iterable[Atoms] | Atoms,
db_path: Path | str,
upload: bool = True,
hf_token: str | None = os.getenv("HF_TOKEN", None),
repo_id: str = "atomind/mlip-arena",
repo_type: str = "dataset",
subfolder: str = Path(__file__).parent.name,
):
"""Save ASE Atoms objects to an ASE database and optionally upload to Hugging Face Hub."""
if upload and hf_token is None:
raise ValueError("HF_TOKEN is required to upload the database.")
db_path = Path(db_path)
if isinstance(atoms_list, Atoms):
atoms_list = [atoms_list]
with connect(db_path) as db:
for atoms in atoms_list:
if not isinstance(atoms, Atoms):
raise ValueError("atoms_list must contain ASE Atoms objects.")
db.write(atoms)
if upload:
api = HfApi(token=hf_token)
api.upload_file(
path_or_fileobj=db_path,
path_in_repo=f"{subfolder}/{db_path.name}",
repo_id=repo_id,
repo_type=repo_type,
)
print(f"{db_path.name} uploaded to {repo_id}/{subfolder}")
return db_path
@task
def get_atoms_from_db(
db_path: Path | str,
repo_id: str = "atomind/mlip-arena",
repo_type: str = "dataset",
subfolder: str = Path(__file__).parent.name,
) -> Generator[Atoms, None, None]:
"""Retrieve ASE Atoms objects from an ASE database."""
db_path = Path(db_path)
if not db_path.exists():
db_path = hf_hub_download(
repo_id=repo_id,
repo_type=repo_type,
subfolder=subfolder,
filename=str(db_path),
)
with connect(db_path) as db:
for row in db.select():
yield row.toatoms()
def body_order(n=32, b=5):
"""
Generate all possible combinations of atomic counts for `b` species
that sum to `n`.
"""
if b == 2:
return [[i, n - i] for i in range(n + 1)]
return [[i] + j for i in range(n + 1) for j in body_order(n=n - i, b=b - 1)]
def generate_sqs(structure_template, elements, counts):
"""
Generate a special quasi-random structure (SQS) based on mole fractions.
"""
import structuretoolkit as stk
mole_fractions = {
el: c / len(structure_template) for el, c in zip(elements, counts)
}
return stk.build.sqs_structures(
structure=structure_template,
mole_fractions=mole_fractions,
)[0]
def get_endmember(structure, conc_lst, elements):
"""
Assign a single element to all atoms in the structure to create an endmember.
"""
structure.symbols[:] = np.array(elements)[conc_lst != 0][0]
return structure
def generate_alloy_db(
structure_template: Atoms,
elements: list[str],
db_path: Path | str,
upload: bool = True,
hf_token: str | None = os.getenv("HF_TOKEN", None),
repo_id: str = "atomind/mlip-arena",
repo_type: str = "dataset",
) -> Path:
if upload and hf_token is None:
raise ValueError("HF_TOKEN is required to upload the database.")
num_atoms = len(structure_template)
num_species = len(elements)
# Generate all possible atomic configurations
configurations = np.array(body_order(n=num_atoms, b=num_species))
# Prepare the database
db_path = (
Path(db_path) or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
)
db_path.unlink(missing_ok=True)
atoms_list = []
for i, composition in tqdm(
enumerate(configurations), total=len(configurations)
):
# Skip trivial cases where only one element is present
if sum(composition == 0) != len(elements) - 1:
atoms = generate_sqs(
structure_template=structure_template,
elements=np.array(elements)[composition != 0],
counts=composition[composition != 0],
)
else:
atoms = get_endmember(
structure=structure_template.copy(),
conc_lst=composition,
elements=elements,
)
atoms_list.append(atoms)
return save_to_db(
atoms_list=atoms_list,
db_path=db_path,
upload=upload,
hf_token=hf_token,
repo_id=repo_id,
repo_type=repo_type,
)
|