Spaces:
Running
Running
File size: 3,263 Bytes
cf8c271 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as stats
import warnings
warnings.simplefilter("ignore", category=Warning)
def custom_agg(x):
result = stats.hmean(x)
return result
def create_plots(big_df, selected_methods):
big_df = big_df[big_df['Method'].isin(selected_methods)]
# we want 1-Rouge-P
big_df["ROUGE-P Forget"] = 1 - big_df["ROUGE-P Forget"]
metrics = list(big_df.columns)
metrics.remove("Method")
metrics.remove("Model")
metrics.remove("Forget Rate")
metrics.remove("LR")
metrics.remove("Epoch")
metrics.remove("Compute")
print(metrics)
# Apply the custom aggregation function across each row, excluding the first column
row_custom_agg = big_df.iloc[:, -len(metrics):].apply(custom_agg, axis=1)
# If you want to add these results back to your original DataFrame
big_df['MAPO'] = row_custom_agg
big_df["LR"] = big_df["LR"].astype(float)
# big_df = big_df[big_df["LR"] >= 1e-5]
big_df["ROUGE-P Forget"] = 1 - big_df["ROUGE-P Forget"]
big_df.reset_index(inplace=True)
print(big_df[["Method", "Model", "Forget Rate", "LR", "Epoch", "ROUGE-P Forget", "MAPO"]].round(2).to_markdown())
# print(big_df.groupby(['Method', 'Model', 'Forget Rate']).head())
result = big_df.loc[big_df.groupby(['Method', 'Model', 'Forget Rate'])['MAPO'].idxmax()]
print(result[["Method", "Model", "Forget Rate", "LR", "Epoch", "MAPO"]].round(6).to_markdown())
# exit()
plot_legend = False
fs = 18 if plot_legend else 22
metrics.append("MAPO")
# Set the style of the visualization
sns.set_theme(style="whitegrid")
plt.rcParams['font.family'] = 'Times New Roman'
for metric_to_plot in metrics:
sub_df = result[big_df["Model"] == "Llama-2-7B"]
fig, ax = plt.subplots(figsize=(15, 5))
sns.barplot(x="Method", y=metric_to_plot, hue="Forget Rate", data=sub_df, ax=ax, legend=plot_legend)
ax.set_ylabel(metric_to_plot, fontsize=fs)
ax.set_ylim(0.0, 1.0)
ax.set_xlabel("", fontsize=fs)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=fs)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=fs-4)
ax.spines[['right', 'top']].set_visible(False)
if plot_legend:
plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1), title="Forget Rate (%)")
plt.title(metric_to_plot + " on Llama-2-7B", fontsize=fs)
plt.tight_layout()
plt.savefig(f"barplots/{metric_to_plot}-Llama-2-7B{'legend' if plot_legend else ''}.pdf")
print(f"\includegraphics[width=\\textwidth]{{figures/barplots/{metric_to_plot}-Llama-2-7B{'legend' if plot_legend else ''}.pdf}}")
plt.close(fig)
for model in ["Llama-2-7B", "Phi"]:
sub_df = result[result["Model"] == model][["Method", "Forget Rate", "MAPO"]]
# print(sub_df.round(6).to_latex(index=False))
sub_df.reset_index(inplace=True)
# Reorienting the dataframe
sub_df_reoriented = sub_df.pivot(index="Method", columns='Forget Rate', values='MAPO')
# Output a latex table of the MAPO values by Method and Forget Rate
print(sub_df_reoriented.round(4).to_latex(index=True)) |