Spaces:

atomind
/

mlip-arena

Running

mlip-arena / examples /eos_bulk /plot.py

Yuan (Cyrus) Chiang

Add plots, tex files, and missing MOF structure (#60)

79edee4 unverified about 19 hours ago

3.76 kB

	from pathlib import Path

	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	from ase.db import connect

	from mlip_arena.models import REGISTRY as MODELS

	DATA_DIR = Path(__file__).parent.absolute()

	# Use a qualitative color palette from matplotlib
	palette_name = "tab10" # Better for distinguishing multiple lines
	color_sequence = plt.get_cmap(palette_name).colors

	valid_models = [
	model
	for model, metadata in MODELS.items()
	if "eos_bulk" in metadata.get("gpu-tasks", [])
	]

	def load_wbm_structures():
	"""
	Load the WBM structures from a ASE DB file.
	"""
	with connect(DATA_DIR.parent / "wbm_structures.db") as db:
	for row in db.select():
	yield row.toatoms(add_additional_information=True)

	# # Collect valid models first
	# valid_models = []
	# for model_name in valid_models:
	# fpath = DATA_DIR / f"{model_name}_processed.parquet"
	# if fpath.exists():
	# df = pd.read_parquet(fpath)
	# if len(df) > 0:
	# valid_models.append(model)

	# # Ensure we're showing all 8 models
	# if len(valid_models) < 8:
	# print(f"Warning: Only found {len(valid_models)} valid models instead of 8")

	# Set up the grid layout
	n_models = len(valid_models)
	n_cols = 4 # Use 4 columns
	n_rows = (n_models + n_cols - 1) // n_cols # Ceiling division to get required rows

	# Create figure with enough space for all subplots
	fig = plt.figure(
	figsize=(6, 1.25 * n_rows), # Wider for better readability
	constrained_layout=True, # Better than tight_layout for this case
	)

	# Create grid of subplots
	axes = []
	for i in range(n_models):
	ax = plt.subplot(n_rows, n_cols, i+1)
	axes.append(ax)

	SMALL_SIZE = 6
	MEDIUM_SIZE = 8
	LARGE_SIZE = 10

	# Fill in the subplots with data
	for i, model_name in enumerate(valid_models):
	fpath = DATA_DIR / f"{model_name}_processed.parquet"
	df = pd.read_parquet(fpath)

	ax = axes[i]
	valid_structures = []

	for j, (_, row) in enumerate(df.iterrows()):
	structure_id = row["structure"]
	formula = row.get("formula", "")
	if isinstance(row["volume-ratio"], (list, np.ndarray)) and isinstance(
	row["energy-delta-per-volume-b0"], (list, np.ndarray)
	):
	vol_strain = row["volume-ratio"]
	energy_delta = row["energy-delta-per-volume-b0"]
	color = color_sequence[j % len(color_sequence)]
	ax.plot(
	vol_strain,
	energy_delta,
	color=color,
	linewidth=1,
	alpha=0.9,
	)
	valid_structures.append(structure_id)

	# Set subplot title
	ax.set_title(f"{model_name} ({len(valid_structures)})", fontsize=MEDIUM_SIZE)

	# Only add y-label to leftmost plots (those with index divisible by n_cols)
	if i % n_cols == 0:
	ax.set_ylabel("$\\frac{\\Delta E}{B V_0}$", fontsize=MEDIUM_SIZE)
	else:
	ax.set_ylabel("")

	# Only add x-label to bottom row plots
	# Check if this plot is in the bottom row
	is_bottom_row = (i // n_cols) == (n_rows - 1) or (i >= n_models - n_cols)
	if is_bottom_row:
	ax.set_xlabel("$V/V_0$", fontsize=MEDIUM_SIZE)
	else:
	ax.set_xlabel("")

	ax.set_ylim(-0.02, 0.1) # Consistent y-limits
	ax.axvline(x=1, linestyle="--", color="gray", alpha=0.7)
	ax.tick_params(axis="both", which="major", labelsize=MEDIUM_SIZE)

	# Make sure all subplots share the x and y limits
	for ax in axes:
	ax.set_xlim(0.8, 1.2) # Adjust these as needed
	ax.set_ylim(-0.02, 0.1)

	# Save the figure with all plots
	plt.savefig(DATA_DIR / "eos-bulk-grid.png", dpi=300, bbox_inches="tight")
	plt.savefig(DATA_DIR / "eos-bulk-grid.pdf", bbox_inches="tight")
	# plt.show()