""" Widom insertion workflow to calculate Henry coefficient and heat of adsorption for a given MOF structure and gas molecule. This script is heavily adapted from the `DAC-SIM `_ package. Please cite the original work if you use this script. References ~~~~~~~~~~~ - Lim, Y., Park, H., Walsh, A., & Kim, J. (2024). Accelerating CO₂ Direct Air Capture Screening for Metal-Organic Frameworks with a Transferable Machine Learning Force Field. """ from collections import defaultdict from pathlib import Path from typing import IO, Any import numpy as np from prefect import flow, task from prefect.cache_policies import INPUTS, TASK_SOURCE from prefect.futures import wait from prefect.logging import get_run_logger from prefect.runtime import task_run from prefect.states import State from tqdm.auto import tqdm from ase import Atoms, units from ase.atoms import Atoms from ase.build import molecule from ase.filters import Filter from ase.io.trajectory import Trajectory, TrajectoryWriter from ase.optimize.optimize import Optimizer from ase.calculators.calculator import BaseCalculator from mlip_arena.models import MLIPEnum from mlip_arena.tasks.optimize import run as OPT from mlip_arena.tasks.utils import get_calculator, logger from .grid import get_accessible_positions from .input import get_atoms_from_db def add_molecule(gas: Atoms, rotate: bool = True, translate: tuple = None) -> Atoms: """ Add a molecule to the simulation cell Parameters ---------- gas : Atoms The gas molecule to add rotate : bool, optional If True, rotate the molecule randomly, by default True translate : tuple, optional The translation of the molecule, by default None Returns ------- Atoms The gas molecule added to the simulation cell Raises ------ ValueError If the translate is not a 3-tuple, raise an error Examples -------- >>> from ml_mc.utils import molecule, add_gas >>> gas = molecule('H2O') >>> gas = add_gas(gas, rotate=True, translate=(0, 0, 0)) """ gas = gas.copy() if rotate: angle = np.random.rand() * 360 axis = np.random.rand(3) gas.rotate(v=axis, a=angle) if translate is not None: if len(translate) != 3: raise ValueError("translate must be a 3-tuple") gas.translate(translate) return gas def get_atomic_density(atoms: Atoms) -> float: """ Calculate atomic density of the atoms. Parameters ---------- atoms : Atoms The Atoms object to operate on. Returns ------- float Atomic density of the atoms in kg/m³. """ volume = atoms.get_volume() * 1e-30 # Convert ų to m³ total_mass = sum(atoms.get_masses()) * units._amu # Convert amu to kg return total_mass / volume def _generate_task_run_name(): task_name = task_run.task_name parameters = task_run.parameters structure = parameters["structure"] gas = parameters["gas"] calculator = parameters["calculator"] return f"{task_name}: {structure.get_chemical_formula()} + {gas.get_chemical_formula()} - {calculator}" @task( name="Widom Insertion", task_run_name=_generate_task_run_name, cache_policy=TASK_SOURCE + INPUTS, ) def widom_insertion( # init structure: Atoms, gas: Atoms, calculator: BaseCalculator, optimizer: Optimizer | str = "FIRE", optimizer_kwargs: dict | None = None, filter: Filter | str | None = "FrechetCell", filter_kwargs: dict | None = None, criterion: dict | None = dict(fmax=0.05, steps=50), temperature: float = 300, init_structure_optimize_loops: int = 10, init_gas_optimize: bool = True, traj_file: str | Path | None = None, # run num_insertions: int = 5000, grid_spacing: float = 0.15, cutoff_distance: float = 1.50, min_interplanar_distance: float = 6.0, fold: int = 3, random_seed: int | None = None, ) -> dict[str, Any] | State: """ Run the Widom insertion algorithm to calculate the Henry coefficient and heat of adsorption. Parameters ---------- num_insertions : int, default=5000 Number of random insertions of the gas molecule during simulation. grid_spacing : float, default=0.15 Spacing of the grid for possible gas insertion points, in angstroms. cutoff_distance : float, default=1.50 When the distance between framework atoms and the gas molecule is less than this value, the insertion is rejected. In angstroms. min_interplanar_distance : float, default=6.0 When the interplanar distance of the framework is less than this value, a supercell is constructed. In angstroms. fold : int, default=3 Number of repetitions of Widom insertion to improve statistics. random_seed : int, optional Seed for the random number generator for reproducibility. Returns ------- Dict[str, Any] Dictionary containing the calculated Henry coefficient (mol/kg Pa), averaged interaction energy (eV), and heat of adsorption (kJ/mol) over the number of folds. """ structure = structure.copy() gas = gas.copy() # Optimize structure and gas molecule while init_structure_optimize_loops > 0: logger.info("Optimizing cell") state = OPT( atoms=structure, calculator=calculator, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, filter=filter, filter_kwargs=filter_kwargs, criterion=criterion, return_state=True, ) if state.is_failed(): return state result = state.result(raise_on_failure=False) structure = result["atoms"] if result["converged"]: break logger.info("Optimizing atoms with fixed cell") state = OPT( atoms=structure, calculator=calculator, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, filter=None, filter_kwargs=None, criterion=criterion, return_state=True, ) if state.is_failed(): return state result = state.result(raise_on_failure=False) structure = result["atoms"] if result["converged"]: break init_structure_optimize_loops -= 1 if init_gas_optimize: logger.info("Optimizing gas molecule") state = OPT( atoms=gas, calculator=calculator, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, filter=None, criterion=criterion, return_state=True, ) if state.is_failed(): return state gas = state.result(raise_on_failure=False)["atoms"] # Calculate accessible positions ret = get_accessible_positions( structure=structure, grid_spacing=grid_spacing, cutoff_distance=cutoff_distance, min_interplanar_distance=min_interplanar_distance, ) pos_grid = ret["pos_grid"] idx_accessible_pos = ret["idx_accessible_pos"] structure = ret["structure"] # supercell structure if necessary logger.info( f"Number of accessible positions: {len(idx_accessible_pos)} out of total {len(pos_grid)}" ) calc = calculator # Calculate energies for structure and gas energy_structure = calc.get_potential_energy(structure) energy_gas = calc.get_potential_energy(gas) # Set random seed if provided if random_seed is not None: np.random.seed(random_seed) logger.info(f"Setting random seed: {random_seed}") if traj_file is not None: traj_file = Path(traj_file) traj_file.parent.mkdir(parents=True, exist_ok=True) # TODO: checkpoint and restart traj = Trajectory(traj_file, "a") else: traj = None # Run Widom insertion algorithm results = defaultdict(list) for ifold in range(fold): nsteps = 0 np.random.shuffle(idx_accessible_pos) interaction_energies = np.zeros(num_insertions) pbar = tqdm(total=num_insertions, desc=f"Fold {ifold + 1}/{fold}") for rand_idx in idx_accessible_pos: # assert rand_idx in idx_accessible_pos if nsteps >= num_insertions: break # Add gas molecule to the accessible position pos = pos_grid[rand_idx] added_gas = add_molecule(gas, rotate=True, translate=pos) structure_with_gas = structure + added_gas structure_with_gas.wrap() # wrap atoms to unit cell # Calculate interaction energy structure_with_gas.calc = calc total_energy = structure_with_gas.get_potential_energy() # [eV] interaction_energy = total_energy - energy_structure - energy_gas # [eV] boltzmann_factor = np.exp( -interaction_energy / (temperature * units._k / units._e) ) # Handle exponential overflow that can cause numerical instability max_exp_arg = 700 # np.exp(700) is close to the max float64 if boltzmann_factor > np.exp(max_exp_arg): logger.warning( f"Exponential overflow detected. Rejecting this step and retrying." ) continue interaction_energies[nsteps] = interaction_energy nsteps += 1 pbar.update(1) # Write trajectory if isinstance(traj, TrajectoryWriter): traj.write(structure_with_gas) pbar.close() assert nsteps == num_insertions, "Cannot reach the number of insertions due to too many invalid steps." # Calculate ensemble averages properties # units._e [J/eV], units._k [J/K], units._k / units._e # [eV/K] boltzmann_factors = np.exp( -interaction_energies / (temperature * units._k / units._e) ) # KH = / (R * T) atomic_density = get_atomic_density(structure) # [kg / m^3] kh = ( boltzmann_factors.sum() / num_insertions / (units._k * units._Nav) # R = [J / mol K] = [Pa m^3 / mol K] / temperature # T = [K] -> [mol/ m^3 Pa] / atomic_density # = [kg / m^3] -> [mol / kg Pa] ) # [mol/kg Pa] # U = < E * exp(-E/RT) > / # [eV] u = (interaction_energies * boltzmann_factors).sum() / boltzmann_factors.sum() # Qst = U - RT # [kJ/mol] qst = (u * units._e - units._k * temperature) * units._Nav * 1e-3 results["henry_coefficient"].append(kh) results["averaged_interaction_energy"].append(u) results["heat_of_adsorption"].append(qst) return results @flow def run( db_path: Path | str = "mofs.db", ): states = [] for model in MLIPEnum: for atoms in tqdm(get_atoms_from_db(db_path)): state = widom_insertion.submit( atoms, molecule("CO2"), calculator=get_calculator( model, dispersion=True, ), return_state=True, ) states.append(state) wait(states) return [s.result(raise_on_failture=False) for s in states if s.is_completed()]