Spaces:
Running
Running
pratyushmaini
commited on
Commit
β’
cf8c271
1
Parent(s):
987cfd4
big df
Browse files- app.py +1 -14
- app_bkp.py +0 -59
- app_old.py +0 -128
- plotter.py +80 -0
app.py
CHANGED
@@ -20,20 +20,7 @@ def change_version(version):
|
|
20 |
return new_df
|
21 |
|
22 |
# Function to create plots
|
23 |
-
|
24 |
-
if not selected_methods:
|
25 |
-
return plt.figure() # Return an empty plot if no method is selected
|
26 |
-
|
27 |
-
filtered_df = df[df['Method'].isin(selected_methods)]
|
28 |
-
fig, ax = plt.subplots()
|
29 |
-
for method in selected_methods:
|
30 |
-
method_df = filtered_df[filtered_df['Method'] == method]
|
31 |
-
ax.plot(method_df['PPL'], label=method) # Example: Plotting PPL, replace with your metrics
|
32 |
-
|
33 |
-
ax.set_xlabel('Index') # Modify as per your data
|
34 |
-
ax.set_ylabel('PPL') # Modify as per your data
|
35 |
-
ax.legend()
|
36 |
-
return fig
|
37 |
|
38 |
# Initialize Gradio app
|
39 |
demo = gr.Blocks()
|
|
|
20 |
return new_df
|
21 |
|
22 |
# Function to create plots
|
23 |
+
from plotter import create_plots
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Initialize Gradio app
|
26 |
demo = gr.Blocks()
|
app_bkp.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import pandas as pd
|
3 |
-
|
4 |
-
# Function to load data from a given CSV file
|
5 |
-
def load_data(version):
|
6 |
-
file_path = f'versions/{version}.csv' # Assuming filenames are version1.csv, version2.csv, version3.csv
|
7 |
-
return pd.read_csv(file_path)
|
8 |
-
|
9 |
-
# Function for searching in the leaderboard
|
10 |
-
def search_leaderboard(df, query):
|
11 |
-
if query == "":
|
12 |
-
return df
|
13 |
-
else:
|
14 |
-
return df[df['Method'].str.contains(query)]
|
15 |
-
|
16 |
-
# Function to change the version of the leaderboard
|
17 |
-
def change_version(version):
|
18 |
-
new_df = load_data(version)
|
19 |
-
return new_df
|
20 |
-
|
21 |
-
# Initialize Gradio app
|
22 |
-
demo = gr.Blocks()
|
23 |
-
|
24 |
-
with demo:
|
25 |
-
gr.Markdown("## π₯ TOFU Leaderboard")
|
26 |
-
|
27 |
-
with gr.Row():
|
28 |
-
version_dropdown = gr.Dropdown(
|
29 |
-
choices=["llama", "phi", "stable-lm"],
|
30 |
-
label="π Select Base Model",
|
31 |
-
value="llama",
|
32 |
-
)
|
33 |
-
|
34 |
-
with gr.Row():
|
35 |
-
search_bar = gr.Textbox(
|
36 |
-
placeholder="Search for methods...",
|
37 |
-
show_label=False,
|
38 |
-
)
|
39 |
-
|
40 |
-
leaderboard_table = gr.components.Dataframe(
|
41 |
-
value=load_data("llama"), # Load initial version (version llama)
|
42 |
-
interactive=True,
|
43 |
-
visible=True,
|
44 |
-
)
|
45 |
-
|
46 |
-
version_dropdown.change(
|
47 |
-
change_version,
|
48 |
-
inputs=version_dropdown,
|
49 |
-
outputs=leaderboard_table
|
50 |
-
)
|
51 |
-
|
52 |
-
search_bar.change(
|
53 |
-
search_leaderboard,
|
54 |
-
inputs=[leaderboard_table, search_bar],
|
55 |
-
outputs=leaderboard_table
|
56 |
-
)
|
57 |
-
|
58 |
-
# Launch the app
|
59 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_old.py
DELETED
@@ -1,128 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
|
3 |
-
from src.assets.text_content import TITLE, INTRODUCTION_TEXT
|
4 |
-
from src.utils import get_data, compare_plots, filter_search
|
5 |
-
|
6 |
-
############################ For Leaderboards #############################
|
7 |
-
DATA_PATH = 'versions'
|
8 |
-
latest_flag = True #Set flag to iclude latest data inz Details and Versions Tab
|
9 |
-
latest_df, latest_vname, previous_df, previous_vname = get_data(DATA_PATH, latest_flag)
|
10 |
-
|
11 |
-
global prev_df
|
12 |
-
prev_df = previous_df[0]
|
13 |
-
def select_prev_df(name):
|
14 |
-
ind = previous_vname.index(name)
|
15 |
-
prev_df = previous_df[ind]
|
16 |
-
return prev_df
|
17 |
-
|
18 |
-
############################ For Plots ####################################
|
19 |
-
global plot_df, MODEL_COLS
|
20 |
-
plot_df = latest_df[0]
|
21 |
-
MODEL_COLS = list(plot_df['Model'].unique())
|
22 |
-
|
23 |
-
|
24 |
-
############# MAIN APPLICATION ######################
|
25 |
-
demo = gr.Blocks()
|
26 |
-
with demo:
|
27 |
-
gr.HTML(TITLE)
|
28 |
-
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
29 |
-
|
30 |
-
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
31 |
-
with gr.TabItem("π₯ TOFU Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
|
32 |
-
with gr.Row():
|
33 |
-
search_bar = gr.Textbox(
|
34 |
-
placeholder=" π Search for models - separate multiple queries with `;` and press ENTER...",
|
35 |
-
show_label=False,
|
36 |
-
elem_id="search-bar",
|
37 |
-
)
|
38 |
-
|
39 |
-
leaderboard_table = gr.components.Dataframe(
|
40 |
-
value=latest_df[0],
|
41 |
-
elem_id="leaderboard-table",
|
42 |
-
interactive=False,
|
43 |
-
visible=True,
|
44 |
-
)
|
45 |
-
|
46 |
-
# Add a dummy leaderboard to handle search queries from the latest_df and not update latest_df
|
47 |
-
dummy_leaderboard_table = gr.components.Dataframe(
|
48 |
-
value=latest_df[0],
|
49 |
-
elem_id="leaderboard-table",
|
50 |
-
interactive=False,
|
51 |
-
visible=False,
|
52 |
-
)
|
53 |
-
|
54 |
-
search_bar.submit(
|
55 |
-
filter_search,
|
56 |
-
[dummy_leaderboard_table, search_bar],
|
57 |
-
leaderboard_table,
|
58 |
-
queue=True
|
59 |
-
)
|
60 |
-
with gr.TabItem("π Plot", id=3):
|
61 |
-
with gr.Row():
|
62 |
-
model_cols = gr.CheckboxGroup(
|
63 |
-
MODEL_COLS,
|
64 |
-
label="Select Models π€",
|
65 |
-
value=[],
|
66 |
-
elem_id="column-select",
|
67 |
-
interactive=True,
|
68 |
-
)
|
69 |
-
|
70 |
-
with gr.Row():
|
71 |
-
plot_grdf = gr.DataFrame(
|
72 |
-
value=plot_df,
|
73 |
-
visible=False
|
74 |
-
)
|
75 |
-
with gr.Row():
|
76 |
-
# Output block for the plot
|
77 |
-
plot_output = gr.Plot()
|
78 |
-
|
79 |
-
model_cols.change(
|
80 |
-
compare_plots,
|
81 |
-
[plot_grdf, model_cols],
|
82 |
-
plot_output,
|
83 |
-
queue=True
|
84 |
-
)
|
85 |
-
|
86 |
-
with gr.TabItem("π Versions and Details", elem_id="details", id=2):
|
87 |
-
with gr.Row():
|
88 |
-
ver_selection = gr.Dropdown(
|
89 |
-
previous_vname, label="Select Version πΉοΈ", value=previous_vname[0]
|
90 |
-
)
|
91 |
-
with gr.Row():
|
92 |
-
search_bar_prev = gr.Textbox(
|
93 |
-
placeholder=" π Search for models - separate multiple queries with `;` and press ENTER...",
|
94 |
-
show_label=False,
|
95 |
-
elem_id="search-bar-2",
|
96 |
-
)
|
97 |
-
|
98 |
-
prev_table = gr.components.Dataframe(
|
99 |
-
value=prev_df,
|
100 |
-
elem_id="leaderboard-table",
|
101 |
-
interactive=False,
|
102 |
-
visible=True,
|
103 |
-
)
|
104 |
-
|
105 |
-
dummy_prev_table = gr.components.Dataframe(
|
106 |
-
value=prev_df,
|
107 |
-
elem_id="leaderboard-table",
|
108 |
-
interactive=False,
|
109 |
-
visible=False,
|
110 |
-
)
|
111 |
-
|
112 |
-
search_bar_prev.submit(
|
113 |
-
filter_search,
|
114 |
-
[dummy_prev_table, search_bar_prev],
|
115 |
-
prev_table,
|
116 |
-
queue=True
|
117 |
-
)
|
118 |
-
|
119 |
-
ver_selection.change(
|
120 |
-
select_prev_df,
|
121 |
-
[ver_selection],
|
122 |
-
prev_table,
|
123 |
-
queue=True
|
124 |
-
)
|
125 |
-
|
126 |
-
demo.load()
|
127 |
-
demo.queue()
|
128 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plotter.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import seaborn as sns
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import scipy.stats as stats
|
6 |
+
|
7 |
+
import warnings
|
8 |
+
warnings.simplefilter("ignore", category=Warning)
|
9 |
+
|
10 |
+
def custom_agg(x):
|
11 |
+
result = stats.hmean(x)
|
12 |
+
return result
|
13 |
+
|
14 |
+
def create_plots(big_df, selected_methods):
|
15 |
+
big_df = big_df[big_df['Method'].isin(selected_methods)]
|
16 |
+
# we want 1-Rouge-P
|
17 |
+
big_df["ROUGE-P Forget"] = 1 - big_df["ROUGE-P Forget"]
|
18 |
+
|
19 |
+
metrics = list(big_df.columns)
|
20 |
+
metrics.remove("Method")
|
21 |
+
metrics.remove("Model")
|
22 |
+
metrics.remove("Forget Rate")
|
23 |
+
metrics.remove("LR")
|
24 |
+
metrics.remove("Epoch")
|
25 |
+
metrics.remove("Compute")
|
26 |
+
|
27 |
+
print(metrics)
|
28 |
+
# Apply the custom aggregation function across each row, excluding the first column
|
29 |
+
row_custom_agg = big_df.iloc[:, -len(metrics):].apply(custom_agg, axis=1)
|
30 |
+
|
31 |
+
# If you want to add these results back to your original DataFrame
|
32 |
+
big_df['MAPO'] = row_custom_agg
|
33 |
+
big_df["LR"] = big_df["LR"].astype(float)
|
34 |
+
# big_df = big_df[big_df["LR"] >= 1e-5]
|
35 |
+
big_df["ROUGE-P Forget"] = 1 - big_df["ROUGE-P Forget"]
|
36 |
+
|
37 |
+
big_df.reset_index(inplace=True)
|
38 |
+
print(big_df[["Method", "Model", "Forget Rate", "LR", "Epoch", "ROUGE-P Forget", "MAPO"]].round(2).to_markdown())
|
39 |
+
|
40 |
+
# print(big_df.groupby(['Method', 'Model', 'Forget Rate']).head())
|
41 |
+
result = big_df.loc[big_df.groupby(['Method', 'Model', 'Forget Rate'])['MAPO'].idxmax()]
|
42 |
+
print(result[["Method", "Model", "Forget Rate", "LR", "Epoch", "MAPO"]].round(6).to_markdown())
|
43 |
+
# exit()
|
44 |
+
|
45 |
+
plot_legend = False
|
46 |
+
fs = 18 if plot_legend else 22
|
47 |
+
metrics.append("MAPO")
|
48 |
+
|
49 |
+
# Set the style of the visualization
|
50 |
+
sns.set_theme(style="whitegrid")
|
51 |
+
plt.rcParams['font.family'] = 'Times New Roman'
|
52 |
+
|
53 |
+
for metric_to_plot in metrics:
|
54 |
+
sub_df = result[big_df["Model"] == "Llama-2-7B"]
|
55 |
+
fig, ax = plt.subplots(figsize=(15, 5))
|
56 |
+
sns.barplot(x="Method", y=metric_to_plot, hue="Forget Rate", data=sub_df, ax=ax, legend=plot_legend)
|
57 |
+
ax.set_ylabel(metric_to_plot, fontsize=fs)
|
58 |
+
ax.set_ylim(0.0, 1.0)
|
59 |
+
ax.set_xlabel("", fontsize=fs)
|
60 |
+
ax.set_xticklabels(ax.get_xticklabels(), fontsize=fs)
|
61 |
+
ax.set_yticklabels(ax.get_yticklabels(), fontsize=fs-4)
|
62 |
+
ax.spines[['right', 'top']].set_visible(False)
|
63 |
+
if plot_legend:
|
64 |
+
plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1), title="Forget Rate (%)")
|
65 |
+
plt.title(metric_to_plot + " on Llama-2-7B", fontsize=fs)
|
66 |
+
plt.tight_layout()
|
67 |
+
plt.savefig(f"barplots/{metric_to_plot}-Llama-2-7B{'legend' if plot_legend else ''}.pdf")
|
68 |
+
print(f"\includegraphics[width=\\textwidth]{{figures/barplots/{metric_to_plot}-Llama-2-7B{'legend' if plot_legend else ''}.pdf}}")
|
69 |
+
plt.close(fig)
|
70 |
+
|
71 |
+
for model in ["Llama-2-7B", "Phi"]:
|
72 |
+
sub_df = result[result["Model"] == model][["Method", "Forget Rate", "MAPO"]]
|
73 |
+
# print(sub_df.round(6).to_latex(index=False))
|
74 |
+
sub_df.reset_index(inplace=True)
|
75 |
+
|
76 |
+
# Reorienting the dataframe
|
77 |
+
sub_df_reoriented = sub_df.pivot(index="Method", columns='Forget Rate', values='MAPO')
|
78 |
+
|
79 |
+
# Output a latex table of the MAPO values by Method and Forget Rate
|
80 |
+
print(sub_df_reoriented.round(4).to_latex(index=True))
|