import seaborn as sns import matplotlib.pyplot as plt import pandas as pd import numpy as np import scipy.stats as stats import warnings warnings.simplefilter("ignore", category=Warning) def custom_agg(x): result = stats.hmean(x) return result def create_plots(big_df, selected_methods): big_df = big_df[big_df['Method'].isin(selected_methods)] # we want 1-Rouge-P big_df["ROUGE-P Forget"] = 1 - big_df["ROUGE-P Forget"] metrics = list(big_df.columns) metrics.remove("Method") metrics.remove("Model") metrics.remove("Forget Rate") metrics.remove("LR") metrics.remove("Epoch") metrics.remove("Compute") print(metrics) # Apply the custom aggregation function across each row, excluding the first column row_custom_agg = big_df.iloc[:, -len(metrics):].apply(custom_agg, axis=1) # If you want to add these results back to your original DataFrame big_df['MAPO'] = row_custom_agg big_df["LR"] = big_df["LR"].astype(float) # big_df = big_df[big_df["LR"] >= 1e-5] big_df["ROUGE-P Forget"] = 1 - big_df["ROUGE-P Forget"] big_df.reset_index(inplace=True) print(big_df[["Method", "Model", "Forget Rate", "LR", "Epoch", "ROUGE-P Forget", "MAPO"]].round(2).to_markdown()) # print(big_df.groupby(['Method', 'Model', 'Forget Rate']).head()) result = big_df.loc[big_df.groupby(['Method', 'Model', 'Forget Rate'])['MAPO'].idxmax()] print(result[["Method", "Model", "Forget Rate", "LR", "Epoch", "MAPO"]].round(6).to_markdown()) # exit() plot_legend = False fs = 18 if plot_legend else 22 metrics.append("MAPO") # Set the style of the visualization sns.set_theme(style="whitegrid") plt.rcParams['font.family'] = 'Times New Roman' for metric_to_plot in metrics: sub_df = result[big_df["Model"] == "Llama-2-7B"] fig, ax = plt.subplots(figsize=(15, 5)) sns.barplot(x="Method", y=metric_to_plot, hue="Forget Rate", data=sub_df, ax=ax, legend=plot_legend) ax.set_ylabel(metric_to_plot, fontsize=fs) ax.set_ylim(0.0, 1.0) ax.set_xlabel("", fontsize=fs) ax.set_xticklabels(ax.get_xticklabels(), fontsize=fs) ax.set_yticklabels(ax.get_yticklabels(), fontsize=fs-4) ax.spines[['right', 'top']].set_visible(False) if plot_legend: plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1), title="Forget Rate (%)") plt.title(metric_to_plot + " on Llama-2-7B", fontsize=fs) plt.tight_layout() plt.savefig(f"barplots/{metric_to_plot}-Llama-2-7B{'legend' if plot_legend else ''}.pdf") print(f"\includegraphics[width=\\textwidth]{{figures/barplots/{metric_to_plot}-Llama-2-7B{'legend' if plot_legend else ''}.pdf}}") plt.close(fig) for model in ["Llama-2-7B", "Phi"]: sub_df = result[result["Model"] == model][["Method", "Forget Rate", "MAPO"]] # print(sub_df.round(6).to_latex(index=False)) sub_df.reset_index(inplace=True) # Reorienting the dataframe sub_df_reoriented = sub_df.pivot(index="Method", columns='Forget Rate', values='MAPO') # Output a latex table of the MAPO values by Method and Forget Rate print(sub_df_reoriented.round(4).to_latex(index=True))