pratyushmaini commited on
Commit
35416d7
·
1 Parent(s): cf8c271
Files changed (2) hide show
  1. src/assets/text_content.py +0 -12
  2. src/utils.py +0 -236
src/assets/text_content.py DELETED
@@ -1,12 +0,0 @@
1
- TITLE = """<h1 align="center" id="space-title"> 🏆 TOFU Leaderboard</h1>"""
2
-
3
- INTRODUCTION_TEXT = """
4
- TOFU leaderboard description.
5
- """
6
-
7
- SHORT_NAMES = {
8
- "KL": "KL",
9
- "Grad Ascent": "Grad Ascent",
10
- "Gradient Difference": "Grad Diff",
11
- "Oracle": "Oracle",
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils.py DELETED
@@ -1,236 +0,0 @@
1
- import os
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
- import numpy as np
5
-
6
- from src.assets.text_content import SHORT_NAMES
7
-
8
- def update_cols(df: pd.DataFrame) -> pd.DataFrame:
9
- '''
10
- Change three header rows to a single header row
11
- Args:
12
- df: Raw dataframe containing 3 separate header rows
13
- Remove this function if the dataframe has only one header row
14
- Returns:
15
- df: Updated dataframe which has only 1 header row instead of 3
16
- '''
17
- default_cols = list(df.columns)
18
-
19
- # First 4 columns are initalised in 'update', Append additional columns for games Model, Clemscore, ALL(PLayed) and ALL(Main Score)
20
- update = ['Model', 'Clemscore', 'Played', 'Quality Score']
21
- game_metrics = default_cols[4:]
22
-
23
- # Change columns Names for each Game
24
- for i in range(len(game_metrics)):
25
- if i%3 == 0:
26
- game = game_metrics[i]
27
- update.append(str(game).capitalize() + "(Played)")
28
- update.append(str(game).capitalize() + "(Quality Score)")
29
- update.append(str(game).capitalize() + "(Quality Score[std])")
30
-
31
- # Create a dict to change names of the columns
32
- map_cols = {}
33
- for i in range(len(default_cols)):
34
- map_cols[default_cols[i]] = str(update[i])
35
-
36
- df = df.rename(columns=map_cols)
37
- df = df.iloc[2:]
38
-
39
- return df
40
-
41
- def process_df(df: pd.DataFrame) -> pd.DataFrame:
42
- '''
43
- Process dataframe - Remove repition in model names, convert datatypes to sort by "float" instead of "str"
44
- Args:
45
- df: Unprocessed Dataframe (after using update_cols)
46
- Returns:
47
- df: Processed Dataframe
48
- '''
49
-
50
- # Change column type to float from str
51
- list_column_names = list(df.columns)
52
- model_col_name = list_column_names[0]
53
- for col in list_column_names:
54
- if col != model_col_name:
55
- df[col] = df[col].astype(float)
56
-
57
- # Remove repetition in model names, if any
58
- models_list = []
59
- for i in range(len(df)):
60
- model_name = df.iloc[i][model_col_name]
61
- splits = model_name.split('--')
62
- splits = [split.replace('-t0.0', '') for split in splits] # Comment to not remove -t0.0
63
- if splits[0] == splits[1]:
64
- models_list.append(splits[0])
65
- else:
66
- models_list.append(splits[0] + "--" + splits[1])
67
- df[model_col_name] = models_list
68
-
69
- return df
70
-
71
- def get_data(path: str, flag: bool):
72
- '''
73
- Get a list of all version names and respective Dataframes
74
- Args:
75
- path: Path to the directory containing CSVs of different versions -> v0.9.csv, v1.0.csv, ....
76
- flag: Set this flag to include the latest version in Details and Versions tab
77
- Returns:
78
- latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
79
- latest_vname: list of the name of latest version
80
- previous_df: list of dataframes for previous versions (can skip latest version if required)
81
- previous_vname: list of the names for the previous versions (INCLUDED IN Details and Versions Tab)
82
- '''
83
- # Check if Directory is empty
84
- list_versions = os.listdir(path)
85
- if not list_versions:
86
- print("Directory is empty")
87
-
88
- else:
89
- files = [file for file in list_versions if file.endswith('.csv')]
90
- files.sort(reverse=True)
91
- file_names = [os.path.splitext(file)[0] for file in files]
92
-
93
- DFS = []
94
- for file in files:
95
- df = pd.read_csv(os.path.join(path, file))
96
- df = update_cols(df) # Remove if by default there is only one header row
97
- df = process_df(df) # Process Dataframe
98
- df = df.sort_values(by=list(df.columns)[1], ascending=False) # Sort by clemscore
99
- DFS.append(df)
100
-
101
- # Only keep relavant columns for the main leaderboard
102
- latest_df_dummy = DFS[0]
103
- all_columns = list(latest_df_dummy.columns)
104
- keep_columns = all_columns[0:4]
105
- latest_df_dummy = latest_df_dummy.drop(columns=[c for c in all_columns if c not in keep_columns])
106
-
107
- latest_df = [latest_df_dummy]
108
- latest_vname = [file_names[0]]
109
- previous_df = []
110
- previous_vname = []
111
- for df, name in zip(DFS, file_names):
112
- previous_df.append(df)
113
- previous_vname.append(name)
114
-
115
- if not flag:
116
- previous_df.pop(0)
117
- previous_vname.pop(0)
118
-
119
- return latest_df, latest_vname, previous_df, previous_vname
120
-
121
- return None
122
-
123
-
124
- # ['Model', 'Clemscore', 'All(Played)', 'All(Quality Score)']
125
- def compare_plots(df: pd.DataFrame, LIST: list):
126
- '''
127
- Quality Score v/s % Played plot by selecting models
128
- Args:
129
- LIST: The list of models to show in the plot, updated from frontend
130
- Returns:
131
- fig: The plot
132
- '''
133
- short_names = label_map(LIST)
134
-
135
- list_columns = list(df.columns)
136
- df = df[df[list_columns[0]].isin(LIST)]
137
-
138
- X = df[list_columns[2]]
139
- fig, ax = plt.subplots()
140
- for model in LIST:
141
- short = short_names[model]
142
- # same_flag = short_names[model][1]
143
- model_df = df[df[list_columns[0]] == model]
144
- x = model_df[list_columns[2]]
145
- y = model_df[list_columns[3]]
146
- color = plt.cm.rainbow(x / max(X)) # Use a colormap for different colors
147
- plt.scatter(x, y, color=color)
148
- # if same_flag:
149
- plt.annotate(f'{short}', (x, y), textcoords="offset points", xytext=(0, -15), ha='center', rotation=0)
150
- # else:
151
- # plt.annotate(f'{short}', (x, y), textcoords="offset points", xytext=(20, -3), ha='center', rotation=0)
152
- ax.grid(which='both', color='grey', linewidth=1, linestyle='-', alpha=0.2)
153
- ax.set_xticks(np.arange(0,110,10))
154
- plt.xlim(-10, 110)
155
- plt.ylim(-10, 110)
156
- plt.xlabel('% Played')
157
- plt.ylabel('Quality Score')
158
- plt.title('Overview of benchmark results')
159
- plt.show()
160
-
161
- return fig
162
-
163
- def shorten_model_name(full_name):
164
- # Split the name into parts
165
- parts = full_name.split('-')
166
-
167
- # Process the name parts to keep only the parts with digits (model sizes and versions)
168
- short_name_parts = [part for part in parts if any(char.isdigit() for char in part)]
169
-
170
- if len(parts) == 1:
171
- short_name = ''.join(full_name[0:min(3, len(full_name))])
172
- else:
173
- # Join the parts to form the short name
174
- short_name = '-'.join(short_name_parts)
175
-
176
- # Remove any leading or trailing hyphens
177
- short_name = full_name[0] + '-'+ short_name.strip('-')
178
-
179
- return short_name
180
-
181
- def label_map(model_list: list) -> dict:
182
- '''
183
- Generate a map from long names to short names, to plot them in frontend graph
184
- Define the short names in src/assets/text_content.py
185
- Args:
186
- model_list: A list of long model names
187
- Returns:
188
- short_name: A map from long to list of short name + indication if models are same or different
189
- '''
190
- short_names = {}
191
- for model_name in model_list:
192
- # splits = model_name.split('--')
193
- # if len(splits) != 1:
194
- # splits[0] = SHORT_NAMES[splits[0] + '-']
195
- # splits[1] = SHORT_NAMES[splits[1] + '-']
196
- # # Define the short name and indicate there are two different models
197
- # short_names[model_name] = [splits[0] + '--' + splits[1], 0]
198
- # else:
199
- if model_name in SHORT_NAMES:
200
- short_name = SHORT_NAMES[model_name]
201
- else:
202
- short_name = shorten_model_name(model_name)
203
-
204
- # Define the short name and indicate both models are same
205
- short_names[model_name] = short_name
206
-
207
- return short_names
208
-
209
- def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
210
- '''
211
- Filter the dataframe based on the search query
212
- Args:
213
- df: Unfiltered dataframe
214
- query: a string of queries separated by ";"
215
- Return:
216
- filtered_df: Dataframe containing searched queries in the 'Model' column
217
- '''
218
- queries = query.split(';')
219
- list_cols = list(df.columns)
220
- df_len = len(df)
221
- filtered_models = []
222
- models_list = list(df[list_cols[0]])
223
- for q in queries:
224
- q = q.lower()
225
- for i in range(df_len):
226
- model_name = models_list[i]
227
- if q in model_name.lower():
228
- filtered_models.append(model_name) # Append model names containing query q
229
-
230
- filtered_df = df[df[list_cols[0]].isin(filtered_models)]
231
-
232
- if query == "":
233
- return df
234
-
235
- return filtered_df
236
-