pratyushmaini commited on
Commit
0d2e03d
·
1 Parent(s): a084b1e

new template test

Browse files
Files changed (4) hide show
  1. app.py +128 -0
  2. src/assets/text_content.py +12 -0
  3. src/utils.py +236 -0
  4. versions/v1.0.csv +9 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from src.assets.text_content import TITLE, INTRODUCTION_TEXT
4
+ from src.utils import get_data, compare_plots, filter_search
5
+
6
+ ############################ For Leaderboards #############################
7
+ DATA_PATH = 'versions'
8
+ latest_flag = True #Set flag to iclude latest data in Details and Versions Tab
9
+ latest_df, latest_vname, previous_df, previous_vname = get_data(DATA_PATH, latest_flag)
10
+
11
+ global prev_df
12
+ prev_df = previous_df[0]
13
+ def select_prev_df(name):
14
+ ind = previous_vname.index(name)
15
+ prev_df = previous_df[ind]
16
+ return prev_df
17
+
18
+ ############################ For Plots ####################################
19
+ global plot_df, MODEL_COLS
20
+ plot_df = latest_df[0]
21
+ MODEL_COLS = list(plot_df['Model'].unique())
22
+
23
+
24
+ ############# MAIN APPLICATION ######################
25
+ demo = gr.Blocks()
26
+ with demo:
27
+ gr.HTML(TITLE)
28
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
29
+
30
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
31
+ with gr.TabItem("🥇 TOFU Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
32
+ with gr.Row():
33
+ search_bar = gr.Textbox(
34
+ placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
35
+ show_label=False,
36
+ elem_id="search-bar",
37
+ )
38
+
39
+ leaderboard_table = gr.components.Dataframe(
40
+ value=latest_df[0],
41
+ elem_id="leaderboard-table",
42
+ interactive=False,
43
+ visible=True,
44
+ )
45
+
46
+ # Add a dummy leaderboard to handle search queries from the latest_df and not update latest_df
47
+ dummy_leaderboard_table = gr.components.Dataframe(
48
+ value=latest_df[0],
49
+ elem_id="leaderboard-table",
50
+ interactive=False,
51
+ visible=False,
52
+ )
53
+
54
+ search_bar.submit(
55
+ filter_search,
56
+ [dummy_leaderboard_table, search_bar],
57
+ leaderboard_table,
58
+ queue=True
59
+ )
60
+ with gr.TabItem("📈 Plot", id=3):
61
+ with gr.Row():
62
+ model_cols = gr.CheckboxGroup(
63
+ MODEL_COLS,
64
+ label="Select Models 🤖",
65
+ value=[],
66
+ elem_id="column-select",
67
+ interactive=True,
68
+ )
69
+
70
+ with gr.Row():
71
+ plot_grdf = gr.DataFrame(
72
+ value=plot_df,
73
+ visible=False
74
+ )
75
+ with gr.Row():
76
+ # Output block for the plot
77
+ plot_output = gr.Plot()
78
+
79
+ model_cols.change(
80
+ compare_plots,
81
+ [plot_grdf, model_cols],
82
+ plot_output,
83
+ queue=True
84
+ )
85
+
86
+ with gr.TabItem("🔄 Versions and Details", elem_id="details", id=2):
87
+ with gr.Row():
88
+ ver_selection = gr.Dropdown(
89
+ previous_vname, label="Select Version 🕹️", value=previous_vname[0]
90
+ )
91
+ with gr.Row():
92
+ search_bar_prev = gr.Textbox(
93
+ placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
94
+ show_label=False,
95
+ elem_id="search-bar-2",
96
+ )
97
+
98
+ prev_table = gr.components.Dataframe(
99
+ value=prev_df,
100
+ elem_id="leaderboard-table",
101
+ interactive=False,
102
+ visible=True,
103
+ )
104
+
105
+ dummy_prev_table = gr.components.Dataframe(
106
+ value=prev_df,
107
+ elem_id="leaderboard-table",
108
+ interactive=False,
109
+ visible=False,
110
+ )
111
+
112
+ search_bar_prev.submit(
113
+ filter_search,
114
+ [dummy_prev_table, search_bar_prev],
115
+ prev_table,
116
+ queue=True
117
+ )
118
+
119
+ ver_selection.change(
120
+ select_prev_df,
121
+ [ver_selection],
122
+ prev_table,
123
+ queue=True
124
+ )
125
+
126
+ demo.load()
127
+ demo.queue()
128
+ demo.launch()
src/assets/text_content.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TITLE = """<h1 align="center" id="space-title"> 🏆 TOFU Leaderboard</h1>"""
2
+
3
+ INTRODUCTION_TEXT = """
4
+ TOFU leaderboard description.
5
+ """
6
+
7
+ SHORT_NAMES = {
8
+ "KL": "KL",
9
+ "Grad Ascent": "Grad Ascent",
10
+ "Gradient Difference": "Grad Diff",
11
+ "Oracle": "Oracle",
12
+ }
src/utils.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+
6
+ from src.assets.text_content import SHORT_NAMES
7
+
8
+ def update_cols(df: pd.DataFrame) -> pd.DataFrame:
9
+ '''
10
+ Change three header rows to a single header row
11
+ Args:
12
+ df: Raw dataframe containing 3 separate header rows
13
+ Remove this function if the dataframe has only one header row
14
+ Returns:
15
+ df: Updated dataframe which has only 1 header row instead of 3
16
+ '''
17
+ default_cols = list(df.columns)
18
+
19
+ # First 4 columns are initalised in 'update', Append additional columns for games Model, Clemscore, ALL(PLayed) and ALL(Main Score)
20
+ update = ['Model', 'Clemscore', 'Played', 'Quality Score']
21
+ game_metrics = default_cols[4:]
22
+
23
+ # Change columns Names for each Game
24
+ for i in range(len(game_metrics)):
25
+ if i%3 == 0:
26
+ game = game_metrics[i]
27
+ update.append(str(game).capitalize() + "(Played)")
28
+ update.append(str(game).capitalize() + "(Quality Score)")
29
+ update.append(str(game).capitalize() + "(Quality Score[std])")
30
+
31
+ # Create a dict to change names of the columns
32
+ map_cols = {}
33
+ for i in range(len(default_cols)):
34
+ map_cols[default_cols[i]] = str(update[i])
35
+
36
+ df = df.rename(columns=map_cols)
37
+ df = df.iloc[2:]
38
+
39
+ return df
40
+
41
+ def process_df(df: pd.DataFrame) -> pd.DataFrame:
42
+ '''
43
+ Process dataframe - Remove repition in model names, convert datatypes to sort by "float" instead of "str"
44
+ Args:
45
+ df: Unprocessed Dataframe (after using update_cols)
46
+ Returns:
47
+ df: Processed Dataframe
48
+ '''
49
+
50
+ # Change column type to float from str
51
+ list_column_names = list(df.columns)
52
+ model_col_name = list_column_names[0]
53
+ for col in list_column_names:
54
+ if col != model_col_name:
55
+ df[col] = df[col].astype(float)
56
+
57
+ # Remove repetition in model names, if any
58
+ models_list = []
59
+ for i in range(len(df)):
60
+ model_name = df.iloc[i][model_col_name]
61
+ splits = model_name.split('--')
62
+ splits = [split.replace('-t0.0', '') for split in splits] # Comment to not remove -t0.0
63
+ if splits[0] == splits[1]:
64
+ models_list.append(splits[0])
65
+ else:
66
+ models_list.append(splits[0] + "--" + splits[1])
67
+ df[model_col_name] = models_list
68
+
69
+ return df
70
+
71
+ def get_data(path: str, flag: bool):
72
+ '''
73
+ Get a list of all version names and respective Dataframes
74
+ Args:
75
+ path: Path to the directory containing CSVs of different versions -> v0.9.csv, v1.0.csv, ....
76
+ flag: Set this flag to include the latest version in Details and Versions tab
77
+ Returns:
78
+ latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
79
+ latest_vname: list of the name of latest version
80
+ previous_df: list of dataframes for previous versions (can skip latest version if required)
81
+ previous_vname: list of the names for the previous versions (INCLUDED IN Details and Versions Tab)
82
+ '''
83
+ # Check if Directory is empty
84
+ list_versions = os.listdir(path)
85
+ if not list_versions:
86
+ print("Directory is empty")
87
+
88
+ else:
89
+ files = [file for file in list_versions if file.endswith('.csv')]
90
+ files.sort(reverse=True)
91
+ file_names = [os.path.splitext(file)[0] for file in files]
92
+
93
+ DFS = []
94
+ for file in files:
95
+ df = pd.read_csv(os.path.join(path, file))
96
+ df = update_cols(df) # Remove if by default there is only one header row
97
+ df = process_df(df) # Process Dataframe
98
+ df = df.sort_values(by=list(df.columns)[1], ascending=False) # Sort by clemscore
99
+ DFS.append(df)
100
+
101
+ # Only keep relavant columns for the main leaderboard
102
+ latest_df_dummy = DFS[0]
103
+ all_columns = list(latest_df_dummy.columns)
104
+ keep_columns = all_columns[0:4]
105
+ latest_df_dummy = latest_df_dummy.drop(columns=[c for c in all_columns if c not in keep_columns])
106
+
107
+ latest_df = [latest_df_dummy]
108
+ latest_vname = [file_names[0]]
109
+ previous_df = []
110
+ previous_vname = []
111
+ for df, name in zip(DFS, file_names):
112
+ previous_df.append(df)
113
+ previous_vname.append(name)
114
+
115
+ if not flag:
116
+ previous_df.pop(0)
117
+ previous_vname.pop(0)
118
+
119
+ return latest_df, latest_vname, previous_df, previous_vname
120
+
121
+ return None
122
+
123
+
124
+ # ['Model', 'Clemscore', 'All(Played)', 'All(Quality Score)']
125
+ def compare_plots(df: pd.DataFrame, LIST: list):
126
+ '''
127
+ Quality Score v/s % Played plot by selecting models
128
+ Args:
129
+ LIST: The list of models to show in the plot, updated from frontend
130
+ Returns:
131
+ fig: The plot
132
+ '''
133
+ short_names = label_map(LIST)
134
+
135
+ list_columns = list(df.columns)
136
+ df = df[df[list_columns[0]].isin(LIST)]
137
+
138
+ X = df[list_columns[2]]
139
+ fig, ax = plt.subplots()
140
+ for model in LIST:
141
+ short = short_names[model]
142
+ # same_flag = short_names[model][1]
143
+ model_df = df[df[list_columns[0]] == model]
144
+ x = model_df[list_columns[2]]
145
+ y = model_df[list_columns[3]]
146
+ color = plt.cm.rainbow(x / max(X)) # Use a colormap for different colors
147
+ plt.scatter(x, y, color=color)
148
+ # if same_flag:
149
+ plt.annotate(f'{short}', (x, y), textcoords="offset points", xytext=(0, -15), ha='center', rotation=0)
150
+ # else:
151
+ # plt.annotate(f'{short}', (x, y), textcoords="offset points", xytext=(20, -3), ha='center', rotation=0)
152
+ ax.grid(which='both', color='grey', linewidth=1, linestyle='-', alpha=0.2)
153
+ ax.set_xticks(np.arange(0,110,10))
154
+ plt.xlim(-10, 110)
155
+ plt.ylim(-10, 110)
156
+ plt.xlabel('% Played')
157
+ plt.ylabel('Quality Score')
158
+ plt.title('Overview of benchmark results')
159
+ plt.show()
160
+
161
+ return fig
162
+
163
+ def shorten_model_name(full_name):
164
+ # Split the name into parts
165
+ parts = full_name.split('-')
166
+
167
+ # Process the name parts to keep only the parts with digits (model sizes and versions)
168
+ short_name_parts = [part for part in parts if any(char.isdigit() for char in part)]
169
+
170
+ if len(parts) == 1:
171
+ short_name = ''.join(full_name[0:min(3, len(full_name))])
172
+ else:
173
+ # Join the parts to form the short name
174
+ short_name = '-'.join(short_name_parts)
175
+
176
+ # Remove any leading or trailing hyphens
177
+ short_name = full_name[0] + '-'+ short_name.strip('-')
178
+
179
+ return short_name
180
+
181
+ def label_map(model_list: list) -> dict:
182
+ '''
183
+ Generate a map from long names to short names, to plot them in frontend graph
184
+ Define the short names in src/assets/text_content.py
185
+ Args:
186
+ model_list: A list of long model names
187
+ Returns:
188
+ short_name: A map from long to list of short name + indication if models are same or different
189
+ '''
190
+ short_names = {}
191
+ for model_name in model_list:
192
+ # splits = model_name.split('--')
193
+ # if len(splits) != 1:
194
+ # splits[0] = SHORT_NAMES[splits[0] + '-']
195
+ # splits[1] = SHORT_NAMES[splits[1] + '-']
196
+ # # Define the short name and indicate there are two different models
197
+ # short_names[model_name] = [splits[0] + '--' + splits[1], 0]
198
+ # else:
199
+ if model_name in SHORT_NAMES:
200
+ short_name = SHORT_NAMES[model_name]
201
+ else:
202
+ short_name = shorten_model_name(model_name)
203
+
204
+ # Define the short name and indicate both models are same
205
+ short_names[model_name] = short_name
206
+
207
+ return short_names
208
+
209
+ def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
210
+ '''
211
+ Filter the dataframe based on the search query
212
+ Args:
213
+ df: Unfiltered dataframe
214
+ query: a string of queries separated by ";"
215
+ Return:
216
+ filtered_df: Dataframe containing searched queries in the 'Model' column
217
+ '''
218
+ queries = query.split(';')
219
+ list_cols = list(df.columns)
220
+ df_len = len(df)
221
+ filtered_models = []
222
+ models_list = list(df[list_cols[0]])
223
+ for q in queries:
224
+ q = q.lower()
225
+ for i in range(df_len):
226
+ model_name = models_list[i]
227
+ if q in model_name.lower():
228
+ filtered_models.append(model_name) # Append model names containing query q
229
+
230
+ filtered_df = df[df[list_cols[0]].isin(filtered_models)]
231
+
232
+ if query == "":
233
+ return df
234
+
235
+ return filtered_df
236
+
versions/v1.0.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Method,Compute,PPL,Truth,ROUGE,MAPO
2
+ DPO,Forget,0.0768753815825175,0.3986066518668836,0.9565206583678539,0.8712100761297452
3
+ DPO,Retain,0.5996286412275281,0.3602207419553394,0.6659869365178336,0.10860875787554469
4
+ Grad Ascent,Forget,0.01276005409910086,0.8739653533368998,0.023760115576687335,0.557119299008513
5
+ Grad Ascent,Retain,0.9153365174437643,0.5117768071328227,0.13019174875223205,0.2736769500895253
6
+ IDK,Forget,0.9575847571359651,0.14495165859171177,0.5215967278097287,0.9513180970650936
7
+ IDK,Retain,0.5081723023409522,0.7260250131902866,0.46407442478973215,0.9008803129332287
8
+ KL,Forget,0.23284021819861755,0.7973023013038227,0.9713336423092905,0.674807833567933
9
+ KL,Retain,0.04023188230471908,0.7071738714102987,0.4663170982373773,0.6658539062921722