Yuan (Cyrus) Chiang Christine Zhang commited on
Commit
9b93949
·
unverified ·
1 Parent(s): fdf446a

Add energy conservation benchmark (#64)

Browse files

* add entropy task script

* soft import, use logger, prefect future

---------

Co-authored-by: Christine Zhang <[email protected]>

benchmarks/energy_conservation/run.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task for running MD simulations and computing the differential entropy
3
+ of the simulated structures with respect to a reference dataset.
4
+
5
+ See https://github.com/dskoda/quests for differential entropy details.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from datetime import datetime
12
+
13
+ import numpy as np
14
+ from ase.io import read
15
+ from prefect import task
16
+ from prefect.cache_policies import INPUTS, TASK_SOURCE
17
+ from prefect.runtime import task_run
18
+
19
+ from mlip_arena.models import MLIPEnum
20
+ from mlip_arena.tasks.md import run as MD
21
+ from mlip_arena.tasks.utils import logger
22
+
23
+ try:
24
+ from quests.descriptor import get_descriptors
25
+ from quests.entropy import delta_entropy
26
+ except ImportError as e:
27
+ logger.warning(e)
28
+ logger.warning(
29
+ "quests is not installed. Please install it using `pip install quests` or following the instructions at https://github.com/dskoda/quests to use this module."
30
+ )
31
+
32
+
33
+ def get_entropy_from_path(
34
+ subset_path, dataset_path, dataset_desc_out_path, k=32, cutoff=5.0, h=0.015
35
+ ):
36
+ """
37
+ Computes the differential entropy of a subset of structures with respect
38
+ to a reference dataset.
39
+
40
+ Arguments:
41
+ subset_path (str): Path to the file containing the subset of structures.
42
+ dataset_path (str): Path to the file containing the full dataset of structures without the subset.
43
+ dataset_desc_out_path (str): Path to save the descriptors of the full dataset.
44
+ k (int, optional): Number of nearest neighbors used for descriptor calculation. Default is 32.
45
+ cutoff (float, optional): Cutoff distance for descriptor calculation. Default is 5.0.
46
+ h (float, optional): Bandwidth for the Gaussian kernel. Default is 0.015.
47
+
48
+ Returns:
49
+ np.ndarray: The differential entropy of the subset with respect to the dataset.
50
+ """
51
+
52
+ x_structures = read(dataset_path, index=":")
53
+ x_desc = get_descriptors(x_structures, k=k, cutoff=cutoff)
54
+ np.save(dataset_desc_out_path, x_desc)
55
+
56
+ y_structures = read(subset_path, index=":")
57
+ y_desc = get_descriptors(y_structures, k=k, cutoff=cutoff)
58
+
59
+ dH = delta_entropy(y_desc, x_desc, h=h)
60
+ return dH
61
+
62
+
63
+ def get_trajectory_entropy(
64
+ trajectory_dir,
65
+ start_idx,
66
+ end_idx,
67
+ step,
68
+ dataset_desc_path,
69
+ k=32,
70
+ cutoff=5.0,
71
+ h=0.015,
72
+ ):
73
+ """
74
+ Computes the differential entropy of a subset of structures in a trajectory with respect
75
+ to a reference dataset.
76
+
77
+ Arguments:
78
+ trajectory_dir (str): Path to the directory containing the trajectory files.
79
+ start_idx (int): Starting index of the subset of structures to select from each trajectory.
80
+ end_idx (int): Ending index of the subset of structures to select from each trajectory.
81
+ step (int): Step size of the subset of structures to select from each trajectory.
82
+ dataset_desc_path (str): Path to the file containing the descriptors of the full dataset of structures without the subset.
83
+ k (int, optional): Number of nearest neighbors used for descriptor calculation. Default is 32.
84
+ cutoff (float, optional): Cutoff distance for descriptor calculation. Default is 5.0.
85
+ h (float, optional): Bandwidth for the Gaussian kernel. Default is 0.015.
86
+
87
+ Choose start_idx, end_idx, step to select which structures to compute the differential entropy for, based on what sliding window is chosen.
88
+ e.g. window of size 5 with stride 2 means we select every other structure starting at index 2 (middle of the first window) to the -2 index (middle of the last window)
89
+
90
+ Returns:
91
+ np.ndarray: The differential entropy of the subset of structures in the trajectory with respect to the dataset.
92
+ """
93
+ structures = []
94
+ for traj_file in sorted(os.listdir(trajectory_dir)):
95
+ traj = read(os.path.join(trajectory_dir, traj_file), index=":")
96
+ every_other = traj[start_idx:end_idx:step]
97
+ structures.extend(every_other)
98
+
99
+ desc = get_descriptors(structures, k=k, cutoff=cutoff)
100
+ x_desc = np.load(dataset_desc_path)
101
+ dH = delta_entropy(desc, x_desc, h=h)
102
+ return dH
103
+
104
+
105
+ def run_simulations(model_names, structures, out_dir):
106
+ """
107
+ Runs simulations on a list of structures.
108
+
109
+ Parameters:
110
+ model_names (list[str]): List of models to use.
111
+ structures (list[ase.Atoms]): List of structures to simulate.
112
+ out_dir (str): Directory to save the simulation trajectories to.
113
+
114
+ Notes:
115
+ Structures are replicated to have at least 100 atoms and at most 500 atoms.
116
+ Structures are simulated with NVE MD at 1000 K for 5 ps.
117
+ Simulation trajectories are saved to files in out_dir, with each file named according to the index of the structure in the list.
118
+ """
119
+ min_atoms = 100
120
+ max_atoms = 500
121
+
122
+ futures = []
123
+
124
+ for model_name in model_names:
125
+ os.makedirs(out_dir, exist_ok=True)
126
+ model = MLIPEnum[model_name]
127
+ calc = model.value()
128
+
129
+ for i, atoms in enumerate(structures):
130
+ logger.info(
131
+ f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Running {model_name} on structure number {i}"
132
+ )
133
+
134
+ # Replicate the structure
135
+ n_atoms = len(atoms)
136
+ rep_factor = int(
137
+ np.ceil((min_atoms / n_atoms) ** (1 / 3))
138
+ ) # cube root since it's a 3D replication
139
+ supercell_atoms = atoms.repeat((rep_factor, rep_factor, rep_factor))
140
+ if len(supercell_atoms) > max_atoms:
141
+ logger.info(
142
+ f"Skipping structure {i} because it has too many atoms ({len(supercell_atoms)} > {max_atoms})"
143
+ )
144
+ continue # skip if it becomes too large
145
+
146
+ # Run NVE MD @ 1000K for 5 ps
147
+ future = MD.submit(
148
+ supercell_atoms,
149
+ calculator=calc,
150
+ ensemble="nve",
151
+ dynamics="velocityverlet",
152
+ time_step=1.0, # fs
153
+ total_time=5000, # 5 ps = 5000 fs
154
+ temperature=1000.0,
155
+ traj_file=f"{out_dir}/{i}.traj",
156
+ traj_interval=100,
157
+ zero_linear_momentum=True,
158
+ zero_angular_momentum=True,
159
+ )
160
+ futures.append(future)
161
+
162
+ return [f.result(raise_on_failure=False) for f in futures]
163
+
164
+
165
+ def _generate_task_run_name():
166
+ task_name = task_run.task_name
167
+ parameters = task_run.parameters
168
+
169
+ trajectory_dir = parameters["trajectory_dir"]
170
+ dataset_desc_path = parameters["dataset_desc_path"]
171
+
172
+ return f"{task_name}: {trajectory_dir} - {dataset_desc_path}"
173
+
174
+
175
+ @task(
176
+ name="Entropy along trajectory",
177
+ task_run_name=_generate_task_run_name,
178
+ cache_policy=TASK_SOURCE + INPUTS,
179
+ )
180
+ def run(
181
+ dataset_path,
182
+ model_names,
183
+ structures,
184
+ trajectory_dir,
185
+ start_idx,
186
+ end_idx,
187
+ step,
188
+ dataset_desc_path,
189
+ dH_out_path,
190
+ k=32,
191
+ cutoff=5.0,
192
+ h=0.015,
193
+ ):
194
+ # Get descriptors for the dataset. This should exclude the subset of structures used for simulations.
195
+ # This may take a while if the dataset is large - in that case, would recommend splitting the structures into separate chunks.
196
+ x_structures = read(dataset_path, index=":")
197
+ x_desc = get_descriptors(x_structures, k=k, cutoff=cutoff)
198
+ np.save(dataset_desc_path, x_desc)
199
+
200
+ # Run simulations
201
+ run_simulations(model_names, structures, trajectory_dir)
202
+
203
+ # Get entropy for structures along trajectories
204
+ dH = get_trajectory_entropy(
205
+ trajectory_dir,
206
+ start_idx,
207
+ end_idx,
208
+ step,
209
+ dataset_desc_path,
210
+ k=k,
211
+ cutoff=cutoff,
212
+ h=h,
213
+ )
214
+ np.save(dH_out_path, dH)
mlip_arena/tasks/phonon.py CHANGED
@@ -36,14 +36,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
  from pathlib import Path
37
 
38
  import numpy as np
39
- from phonopy import Phonopy
40
- from phonopy.structure.atoms import PhonopyAtoms
41
  from prefect import task
42
  from prefect.cache_policies import INPUTS, TASK_SOURCE
43
  from prefect.runtime import task_run
44
 
45
- from ase import Atoms
46
- from ase.calculators.calculator import BaseCalculator
 
 
 
 
 
 
 
 
47
 
48
 
49
  @task(cache_policy=TASK_SOURCE + INPUTS)
@@ -151,9 +159,7 @@ def run(
151
  filename=Path(outdir, "band.yaml") if outdir is not None else "band.yaml",
152
  )
153
  if outdir:
154
- phonon.save(
155
- Path(outdir, "phonopy.yaml"), settings={"force_constants": True}
156
- )
157
 
158
  return {
159
  "phonon": phonon,
 
36
  from pathlib import Path
37
 
38
  import numpy as np
39
+ from ase import Atoms
40
+ from ase.calculators.calculator import BaseCalculator
41
  from prefect import task
42
  from prefect.cache_policies import INPUTS, TASK_SOURCE
43
  from prefect.runtime import task_run
44
 
45
+ from mlip_arena.tasks.utils import logger
46
+
47
+ try:
48
+ from phonopy import Phonopy
49
+ from phonopy.structure.atoms import PhonopyAtoms
50
+ except ImportError as e:
51
+ logger.warning(e)
52
+ logger.warning(
53
+ "Phonopy is not installed. Please install it following the instructions at https://phonopy.github.io/phonopy/install.html to use this module."
54
+ )
55
 
56
 
57
  @task(cache_policy=TASK_SOURCE + INPUTS)
 
159
  filename=Path(outdir, "band.yaml") if outdir is not None else "band.yaml",
160
  )
161
  if outdir:
162
+ phonon.save(Path(outdir, "phonopy.yaml"), settings={"force_constants": True})
 
 
163
 
164
  return {
165
  "phonon": phonon,