Yuan (Cyrus) Chiang
Add more benchmarks result and rename direcotry (#63)
fdf446a unverified
import glob
import pickle
from pathlib import Path
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from pymatgen.core import Element
from mlip_arena.models import REGISTRY
DATA_DIR = Path(__file__).parent
mlip_models = ["MACE-MP(M)", "MatterSim", "ORBv2", "M3GNet", "CHGNet", "SevenNet"]
fcc_pbe = pd.read_csv(DATA_DIR / "Table-A1-fcc.csv")
hcp_pbe = pd.read_csv(DATA_DIR / "Table-A2-hcp.csv")
# fcc
# Initialize an empty DataFrame
results_df = pd.DataFrame(columns=["symbol", "model", "fit_path", "fit_energies"])
for model in mlip_models:
out_dir = Path(REGISTRY[model]["family"])
for index, row in fcc_pbe.iterrows():
symbol = row["symbol"]
if Element(symbol.split("_")[0]).is_noble_gas:
continue
files = glob.glob(str(out_dir / f"{model}-fcc-{symbol.split('_')[0]}108.pkl"))
if len(files) == 0:
print("skip", model, symbol)
# Add missing data to the DataFrame
# if symbol not in results_df['symbol'].values:
# Create a new row if the symbol is not yet in the DataFrame
new_row = {
"symbol": symbol,
"model": model,
"pbe_e_vacmig": row["e_vacmig"],
"fit_path": [],
"fit_energies": [],
}
results_df = pd.concat(
[results_df, pd.DataFrame([new_row])], ignore_index=True
)
continue
file = files[0]
with open(file, "rb") as f:
result = pickle.load(f)
# Add data to the DataFrame
# if symbol not in results_df['symbol'].values:
# Create a new row if the symbol is not yet in the DataFrame
forcefit = result["neb"]["forcefit"]
new_row = {
"symbol": symbol,
"model": model,
"pbe_e_vacmig": row["e_vacmig"],
"fit_path": forcefit.fit_path,
"fit_energies": forcefit.fit_energies,
}
results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
nrows = 2
ncols = len(mlip_models) // nrows
fig, axes = plt.subplots(
nrows=nrows,
ncols=ncols,
figsize=(6, 4),
sharex=True,
sharey=True,
constrained_layout=True,
dpi=300,
)
for i, (ax, model) in enumerate(zip(axes.ravel(), mlip_models, strict=False)):
filtered_df = results_df[results_df["model"] == model]
asymmetries = []
middle_deviations = []
for index, row in filtered_df.iterrows():
if len(row["fit_path"]) == 0 or pd.isna(row["pbe_e_vacmig"]):
continue
x = row["fit_path"] / max(row["fit_path"])
y = row["fit_energies"] / row["pbe_e_vacmig"]
# middle_idx = np.argmin(np.abs(x - 0.5))
left_side = y[x <= 0.5]
right_side = y[x >= 0.5][::-1]
min_len = min(len(left_side), len(right_side))
left_side = left_side[:min_len]
right_side = right_side[:min_len]
asymmetry = np.abs(left_side - right_side).mean()
# middle = (left_side[-1] + right_side[-1]) / 2
middle = max(y)
if np.abs(np.array(y)).max() > 10:
continue
asymmetries.append(asymmetry)
middle_deviations.append(middle - 1)
ax.plot(
x,
y,
alpha=0.5,
color=method_color_mapping[model],
label=model,
)
asymmetries = np.array(asymmetries)
middle_deviations = np.array(middle_deviations)
ax.text(
0.05,
0.95,
"\n".join(
[
f"Miss: {len(filtered_df) - len(asymmetries) - filtered_df['pbe_e_vacmig'].isna().sum()}",
f"Asym: {asymmetries.mean():.3f}",
f"MAPE@max: {np.abs(middle_deviations).mean() * 100:.1f}",
]
),
transform=ax.transAxes,
ha="left",
va="top",
fontsize="small",
# fontsize=6,
)
ax.set(
title=model,
xlabel="Normalized path" if i >= len(models) - ncols else None,
ylabel="Normalized energy" if i % ncols == 0 else None,
ylim=(-0.1, 2),
)
with open(DATA_DIR / "fcc.pkl", "wb") as f:
pickle.dump(fig, f)
# hcp
# Initialize an empty DataFrame
results_df = pd.DataFrame(columns=["symbol", "model", "fit_path", "fit_energies"])
for model in mlip_models:
out_dir = Path(REGISTRY[model]["family"])
for index, row in hcp_pbe.iterrows():
symbol = row["symbol"]
if Element(symbol.split("_")[0]).is_noble_gas:
continue
files = glob.glob(str(out_dir / f"{model}-hcp-{symbol.split('_')[0]}36.pkl"))
if len(files) == 0:
print("skip", model, symbol)
# Add missing data to the DataFrame
# if symbol not in results_df['symbol'].values:
# Create a new row if the symbol is not yet in the DataFrame
new_row = {
"symbol": symbol,
"model": model,
"pbe_e_vacmig": row["e_vacmig"],
"fit_path": [],
"fit_energies": [],
}
results_df = pd.concat(
[results_df, pd.DataFrame([new_row])], ignore_index=True
)
# else:
# # Update the existing row with the model's prediction
# results_df.loc[results_df['symbol'] == symbol, model] = pd.NA
continue
file = files[0]
with open(file, "rb") as f:
result = pickle.load(f)
# Add data to the DataFrame
# if symbol not in results_df['symbol'].values:
# Create a new row if the symbol is not yet in the DataFrame
forcefit = result["neb"]["forcefit"]
new_row = {
"symbol": symbol,
"model": model,
"pbe_e_vacmig": row["e_vacmig"],
"fit_path": forcefit.fit_path,
"fit_energies": forcefit.fit_energies,
}
results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
nrows = 2
ncols = len(mlip_models) // nrows
threshold = 0.10
fig, axes = plt.subplots(
nrows=nrows,
ncols=ncols,
figsize=(6, 4),
sharex=True,
sharey=True,
constrained_layout=True,
dpi=300,
)
for i, (ax, model) in enumerate(zip(axes.ravel(), mlip_models, strict=False)):
filtered_df = results_df[results_df["model"] == model]
asymmetries = []
middle_deviations = []
for index, row in filtered_df.iterrows():
if len(row["fit_path"]) == 0 or pd.isna(row["pbe_e_vacmig"]):
continue
x = row["fit_path"] / max(row["fit_path"])
y = row["fit_energies"] / row["pbe_e_vacmig"]
# middle_idx = np.argmin(np.abs(x - 0.5))
left_side = y[x <= 0.5]
right_side = y[x >= 0.5][::-1]
min_len = min(len(left_side), len(right_side))
left_side = left_side[:min_len]
right_side = right_side[:min_len]
asymmetry = np.abs(left_side - right_side).mean()
# middle = (left_side[-1] + right_side[-1]) / 2
middle = max(y)
if np.abs(np.array(y)).max() > 10:
continue
asymmetries.append(asymmetry)
middle_deviations.append(middle - 1)
ax.plot(
x,
y,
alpha=0.5,
color=method_color_mapping[model],
label=model,
)
asymmetries = np.array(asymmetries)
middle_deviations = np.array(middle_deviations)
ax.text(
0.05,
0.95,
"\n".join(
[
f"Miss: {len(filtered_df) - len(asymmetries) - filtered_df['pbe_e_vacmig'].isna().sum()}",
f"Asym: {asymmetries.mean():.3f}",
f"MAPE@max: {np.abs(middle_deviations).mean() * 100:.1f}",
]
),
transform=ax.transAxes,
ha="left",
va="top",
fontsize="small",
)
ax.set(
title=model,
xlabel="Normalized path" if i >= len(mlip_models) - ncols else None,
ylabel="Normalized energy" if i % ncols == 0 else None,
ylim=(-0.1, 2),
)
with open(DATA_DIR / "hcp.pkl", "wb") as f:
pickle.dump(fig, f)