In [None]:
import glob
from pathlib import Path

from tqdm.auto import tqdm

from mlip_arena.models import REGISTRY
from mlip_arena.tasks.stability.input import get_atoms_from_db

RUN_DIR = Path(".").resolve()

In [None]:
compositions = []
sizes = []
for atoms in tqdm(get_atoms_from_db("random-mixture.db")):
 if len(atoms) == 0:
 continue
 compositions.append(atoms.get_chemical_formula())

In [None]:
import pymatviz as pmv
from matplotlib import pyplot as plt

%matplotlib inline

fig = pmv.ptable_heatmap(
 pmv.count_elements(compositions[:1000]),
 colormap="GnBu",
 log=True,
 return_type="figure",
)

plt.savefig("../figures/stability-element-counts.pdf")
plt.show()

In [None]:
import numpy as np
import pandas as pd
from ase import Atoms


def get_runtime_stats(traj: list[Atoms], atoms0: Atoms):
 restarts = []
 steps, times = [], []
 Ts, Ps, Es, KEs = [], [], [], []
 timesteps = []
 com_drifts = []

 for atoms in tqdm(traj):
 assert isinstance(atoms, Atoms)
 try:
 energy = atoms.get_potential_energy()
 assert np.isfinite(energy), f"invalid energy: {energy}"
 # assert np.all(~np.isnan(atoms.get_forces())), f"invalid forces: {atoms.get_forces()}"
 # assert np.all(~np.isnan(atoms.get_stress())), f"invalid stress: {atoms.get_stress()}"
 except Exception:
 continue

 restarts.append(atoms.info["restart"])
 times.append(atoms.info["datetime"])
 steps.append(atoms.info["step"])
 Es.append(energy)
 KEs.append(atoms.get_kinetic_energy())
 Ts.append(atoms.get_temperature())
 try:
 Ps.append(atoms.get_stress()[:3].mean())
 except:
 pass
 com_drifts.append(
 (atoms.get_center_of_mass() - atoms0.get_center_of_mass()).tolist()
 )

 restarts = np.array(restarts)
 times = np.array(times)
 steps = np.array(steps)

 # Identify unique blocks
 unique_restarts = np.unique(restarts)

 total_time_seconds = 0
 total_steps = 0

 # Iterate over unique blocks to calculate averages
 for block in unique_restarts:
 # Get the indices corresponding to the current block
 # indices = np.where(restarts == block)[0]
 indices = restarts == block
 # Extract the corresponding data values
 block_time = times[indices][-1] - times[indices][0]
 total_time_seconds += block_time.total_seconds()
 total_steps += steps[indices][-1] - steps[indices][0]

 target_steps = traj[0].info["target_steps"]
 natoms = len(traj[0])

 return {
 "natoms": natoms,
 "total_time_seconds": total_time_seconds,
 "total_steps": total_steps,
 "steps_per_second": total_steps / total_time_seconds
 if total_time_seconds != 0
 else 0,
 "seconds_per_step": total_time_seconds / total_steps
 if total_steps != 0
 else float("inf"),
 "seconds_per_step_per_atom": total_time_seconds / total_steps / natoms
 if total_steps != 0
 else float("inf"),
 "energies": Es,
 "kinetic_energies": KEs,
 "temperatures": Ts,
 "pressures": Ps,
 "target_steps": target_steps,
 "final_step": steps[-1] if len(steps) != 0 else 0,
 "timestep": steps,
 "com_drifts": com_drifts,
 }


In [None]:
import plotly.colors as pcolors

mlip_methods = [
 model
 for model, metadata in REGISTRY.items()
 if "stability" in metadata.get("gpu-tasks", [])
]

all_attributes = dir(pcolors.qualitative)
color_palettes = {
 attr: getattr(pcolors.qualitative, attr)
 for attr in all_attributes
 if isinstance(getattr(pcolors.qualitative, attr), list)
}
color_palettes.pop("__all__", None)

palette_names = list(color_palettes.keys())
palette_colors = list(color_palettes.values())
palette_name = "T10" # "Plotly"
color_sequence = color_palettes[palette_name] # type: ignore

method_color_mapping = {
 method: color_sequence[i % len(color_sequence)]
 for i, method in enumerate(mlip_methods)
}

# NPT

In [None]:
# from huggingface_hub import HfApi
import seaborn as sns
from ase import units
from ase.io import read
from matplotlib import pyplot as plt

df = pd.DataFrame()

for model in mlip_methods:
 # if "stability" not in REGISTRY[model]['gpu-tasks']:
 # continue

 files = glob.glob(str(RUN_DIR / REGISTRY[model]["family"] / f"{model}_*npt.traj"))

 for i, file in enumerate(files):
 try:
 traj = read(file, index=":")
 except Exception as e:
 print(f"Error reading {file}: {e}")
 continue

 try:
 stats = get_runtime_stats(traj, atoms0=traj[0])
 except Exception as e:
 print(f"Error processing {file}: {e}")
 continue

 df = pd.concat(
 [
 df,
 pd.DataFrame(
 {
 "model": model,
 "formula": traj[0].get_chemical_formula(),
 "normalized_timestep": stats["timestep"]
 / stats["target_steps"],
 "normalized_final_step": stats["final_step"]
 / stats["target_steps"],
 "pressure": np.array(stats["pressures"]) / units.GPa,
 }
 | stats
 ),
 ],
 ignore_index=True,
 )


In [None]:
%matplotlib inline

# import scipy.optimize as opt
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit


# Define the power-law fitting function
def power_law(x, a, n):
 return a * np.power(x, n)


df.rename(
 columns={
 "final_step": "Total steps",
 "model": "Model",
 },
 inplace=True,
)

with plt.style.context("default"):

 SMALL_SIZE = 8

 fig, axes = plt.subplot_mosaic(
 """
 ao
 """,
 constrained_layout=True,
 figsize=(6, 3),
 width_ratios=[1, 3],
 )

 iax = "o"
 ax = axes.pop(iax)

 sns.scatterplot(
 data=df,
 x="natoms",
 y="steps_per_second",
 size="Total steps",
 hue="Model",
 ax=ax,
 palette=method_color_mapping,
 sizes=(1, 50),
 # alpha=0.5
 )

 # Fit and plot power-law regression for each model
 for model, data in df.groupby("Model"):
 data.dropna(subset=["steps_per_second"], inplace=True)

 popt, pcov = curve_fit(power_law, data["natoms"], data["steps_per_second"])

 # Generate smooth curve
 # x_fit = np.logspace(np.log10(xdata.min()), np.log10(xdata.max()), 100)
 # y_fit = power_law(x_fit, a_fit, n_fit)

 x = np.linspace(data["natoms"].min(), data["natoms"].max(), 100)

 # Plot regression line
 ax.plot(
 x,
 power_law(x, *popt),
 c=method_color_mapping[model],
 # label=f"{model} (y={a_fit:.2e}x^{n_fit:.2f})",
 linestyle="-",
 )

 # sns.lineplot(
 # data=df,
 # x='natoms',
 # y='steps_per_second',
 # # size='Total steps',
 # hue='Model',
 # ax=ax,
 # palette=method_color_mapping,
 # alpha=0.5,
 # # err_style="bars"
 # )

 ax.set(
 xlabel="Number of atoms",
 xscale="log",
 ylabel="Steps per second",
 yscale="log",
 )
 ax.spines["right"].set_visible(False)
 ax.spines["top"].set_visible(False)
 ax.grid(alpha=0.25)
 ax.legend(
 loc="upper left", bbox_to_anchor=(1.0, 1.0), fontsize="x-small", frameon=False
 )

 fisrt = 80

 for k, df_model in df.groupby("Model"):
 ax = axes["a"]

 df_model.drop_duplicates(["formula"], inplace=True)
 df_model = df_model[df_model["formula"].isin(compositions[:fisrt])].copy()
 print(k, len(df_model))

 # Compute histogram
 bins = np.linspace(0, 1, 50) # 50 bins from 0 to 1
 hist, bin_edges = np.histogram(
 df_model["normalized_final_step"], bins=bins, density=False
 )

 # Compute cumulative population
 cumulative_population = np.cumsum(hist)

 # Midpoints for binning
 bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

 sns.lineplot(
 x=bin_centers[:-1],
 y=(cumulative_population[-1] - cumulative_population[:-1]) / first * 100,
 ax=axes["a"],
 # label=k,
 color=method_color_mapping[k],
 # palette=method_color_mapping
 )

 ax_main = axes["a"]
 ax_main.spines["right"].set_visible(False)
 ax_temp = ax_main.twiny()
 ax_pressure = ax_main.twiny()

 # === Plot styling and range ===
 ax_main.set_xlim(0, 1)
 ax_main.set_ylim(0, 100)
 # ax_main.set_yticks(range(0, 81, 20))
 ax_main.set_ylabel("valid runs (%)")


 # === Set top x-axis: Time (ps) ===
 ax_main.set_xticks([0, 1])
 ax_main.set_xticklabels([0, 10])
 ax_main.set_xlabel("Time (ps)")
 ax_main.xaxis.set_label_position("top")
 ax_main.xaxis.tick_top()
 ax_main.spines["top"].set_position(("outward", 5)) # Keep just below plot
 # ax_main.tick_params(axis="x", top=True, labeltop=True, bottom=False, labelbottom=False)

 # === Bottom axis: Temperature ===
 ax_temp.set_xlim(ax_main.get_xlim())
 ax_temp.set_xticks([0, 1])
 ax_temp.set_xticklabels(["300 K", "3000 K"])
 # ax_temp.set_xlabel("Temperature (K)")
 ax_temp.xaxis.set_ticks_position("bottom")
 ax_temp.xaxis.set_label_position("bottom")
 ax_temp.spines["right"].set_visible(False)
 ax_temp.spines["top"].set_visible(False)
 ax_temp.spines["bottom"].set_position(("outward", 5)) # Keep just below plot

 # === Lower bottom axis: Pressure ===
 ax_pressure.set_xlim(ax_main.get_xlim())
 ax_pressure.set_xticks([0, 1])
 ax_pressure.set_xticklabels(["0 GPa", "500 GPa"])
 # ax_pressure.set_xlabel("Pressure (GPa)")
 ax_pressure.xaxis.set_ticks_position("bottom")
 ax_pressure.xaxis.set_label_position("bottom")
 ax_pressure.spines["right"].set_visible(False)
 ax_pressure.spines["top"].set_visible(False)
 ax_pressure.spines["bottom"].set_position(("outward", 25)) # Push further down

 # # === Clean up main axis ===
 ax_main.legend_ = None

 plt.savefig("stability-and-speed-npt-loglog.pdf", bbox_inches="tight")
 plt.savefig(
 "stability-and-speed-npt-loglog.png", bbox_inches="tight", dpi=330
 )

 # plt.show()

# NVT

In [None]:
import pandas as pd

# from huggingface_hub import HfApi
import seaborn as sns
from ase import units
from ase.io import read
from matplotlib import pyplot as plt

df = pd.DataFrame()

for model in mlip_methods:
 # if "stability" not in REGISTRY[model]['gpu-tasks']:
 # continue

 files = glob.glob(str(RUN_DIR / REGISTRY[model]["family"] / f"{model}_*nvt.traj"))

 for i, file in enumerate(files):
 try:
 traj = read(file, index=":")
 except Exception as e:
 print(f"Error reading {file}: {e}")
 continue

 try:
 stats = get_runtime_stats(traj, atoms0=traj[0])
 except Exception as e:
 print(f"Error processing {file}: {e}")
 continue

 df = pd.concat(
 [
 df,
 pd.DataFrame(
 {
 "model": model,
 "formula": traj[0].get_chemical_formula(),
 "normalized_timestep": stats["timestep"]
 / stats["target_steps"],
 "normalized_final_step": stats["final_step"]
 / stats["target_steps"],
 "pressure": np.array(stats["pressures"]) / units.GPa,
 }
 | stats
 ),
 ],
 ignore_index=True,
 )


In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

# import scipy.optimize as opt
import seaborn as sns
from scipy.optimize import curve_fit


# Define the power-law fitting function
def power_law(x, a, n):
 return a * np.power(x, n)


df.rename(
 columns={
 "final_step": "Total steps",
 "model": "Model",
 },
 inplace=True,
)

with plt.style.context("default"):
 fig, axes = plt.subplot_mosaic(
 """
 ao
 """,
 constrained_layout=True,
 figsize=(6, 3),
 width_ratios=[1, 3],
 )

 iax = "o"
 ax = axes.pop(iax)

 sns.scatterplot(
 data=df,
 x="natoms",
 y="steps_per_second",
 size="Total steps",
 hue="Model",
 ax=ax,
 palette=method_color_mapping,
 sizes=(1, 50),
 # alpha=0.5
 )

 # Fit and plot power-law regression for each model
 for model, data in df.groupby("Model"):
 data.dropna(subset=["steps_per_second"], inplace=True)

 popt, pcov = curve_fit(power_law, data["natoms"], data["steps_per_second"])

 # Generate smooth curve
 # x_fit = np.logspace(np.log10(xdata.min()), np.log10(xdata.max()), 100)
 # y_fit = power_law(x_fit, a_fit, n_fit)

 x = np.linspace(data["natoms"].min(), data["natoms"].max(), 100)

 # Plot regression line
 ax.plot(
 x,
 power_law(x, *popt),
 c=method_color_mapping[model],
 # label=f"{model} (y={a_fit:.2e}x^{n_fit:.2f})",
 linestyle="-",
 )

 # sns.lineplot(
 # data=df,
 # x='natoms',
 # y='steps_per_second',
 # # size='Total steps',
 # hue='Model',
 # ax=ax,
 # palette=method_color_mapping,
 # alpha=0.5,
 # # err_style="bars"
 # )

 ax.set(
 xlabel="Number of atoms",
 xscale="log",
 ylabel="Steps per second",
 yscale="log",
 )
 ax.spines["right"].set_visible(False)
 ax.spines["top"].set_visible(False)
 ax.grid(alpha=0.25)
 ax.legend(
 loc="upper left", bbox_to_anchor=(1.0, 1.0), fontsize="x-small", frameon=False
 )

 fisrt = 120

 for k, df_model in df.groupby("Model"):
 ax = axes["a"]

 df_model.drop_duplicates(["formula"], inplace=True)
 df_model = df_model[df_model["formula"].isin(compositions[:fisrt])].copy()

 # Compute histogram
 bins = np.linspace(0, 1, 50) # 50 bins from 0 to 1
 hist, bin_edges = np.histogram(
 df_model["normalized_final_step"], bins=bins, density=False
 )

 # Compute cumulative population
 cumulative_population = np.cumsum(hist)

 # Midpoints for binning
 bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

 sns.lineplot(
 x=bin_centers[:-1],
 y=(cumulative_population[-1] - cumulative_population[:-1]) / fisrt * 100,
 ax=axes["a"],
 # label=k,
 color=method_color_mapping[k],
 # palette=method_color_mapping
 )

 ax_main = axes["a"]
 ax_main.spines["right"].set_visible(False)
 ax_temp = ax_main.twiny()
 # ax_pressure = ax_main.twiny()

 # === Plot styling and range ===
 ax_main.set_xlim(0, 1)
 # ax_main.set_ylim(0, 100)
 # ax_main.set_yticks(range(0, 81, 20))
 ax_main.set_ylabel("valid runs (%)")


 # === Set top x-axis: Time (ps) ===
 ax_main.set_xticks([0, 1])
 ax_main.set_xticklabels([0, 10])
 ax_main.set_xlabel("Time (ps)")
 ax_main.xaxis.set_label_position("top")
 ax_main.xaxis.tick_top()
 ax_main.spines["top"].set_position(("outward", 5)) # Keep just below plot
 # ax_main.tick_params(axis="x", top=True, labeltop=True, bottom=False, labelbottom=False)

 # === Bottom axis: Temperature ===
 ax_temp.set_xlim(ax_main.get_xlim())
 ax_temp.set_xticks([0, 1])
 ax_temp.set_xticklabels(["300 K", "3000 K"])
 # ax_temp.set_xlabel("Temperature (K)")
 ax_temp.xaxis.set_ticks_position("bottom")
 ax_temp.xaxis.set_label_position("bottom")
 ax_temp.spines["right"].set_visible(False)
 ax_temp.spines["top"].set_visible(False)
 ax_temp.spines["bottom"].set_position(("outward", 5)) # Keep just below plot

 # # === Clean up main axis ===
 ax_main.legend_ = None

 plt.savefig("stability-and-speed-nvt-loglog.pdf", bbox_inches="tight")
 plt.savefig(
 "stability-and-speed-nvt-loglog.png", bbox_inches="tight", dpi=330
 )

 # plt.show()