vwxyzjn commited on
Commit
8cc933f
·
verified ·
1 Parent(s): 8ee0069

Make different aggs run faster (#5)

Browse files

- Make different aggs run faster (d59694170093223a311cd5b6c35dd522ed75d5bd)

Files changed (1) hide show
  1. app.py +12 -18
app.py CHANGED
@@ -13,7 +13,7 @@ Evaluation of H4 and community models across a diverse range of benchmarks from
13
  BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5"]
14
 
15
 
16
- def get_leaderboard_df(agg: str = "max"):
17
  filepaths = list(Path("eval_results").rglob("*.json"))
18
 
19
  # Parse filepaths to get unique models
@@ -128,6 +128,12 @@ def get_leaderboard_df(agg: str = "max"):
128
  # Strip off date from model name
129
  df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
130
 
 
 
 
 
 
 
131
  # Drop date and aggregate results by model name
132
  df = df.drop("Date", axis=1).groupby("Model").agg(agg).reset_index()
133
 
@@ -136,20 +142,12 @@ def get_leaderboard_df(agg: str = "max"):
136
  # Convert all values to percentage
137
  df[df.select_dtypes(include=["number"]).columns] *= 100.0
138
  df = df.sort_values(by=["Average"], ascending=False)
139
-
140
  return df
141
 
142
-
143
- leaderboard_df = get_leaderboard_df()
144
-
145
-
146
- def refresh(agg: str = "max"):
147
- return get_leaderboard_df(agg=agg)
148
-
149
-
150
  # Function to update the table based on search query
151
- def filter_and_search(cols: list[str], search_query: str):
152
  df = leaderboard_df
 
153
  if len(search_query) > 0:
154
  search_terms = search_query.split(";")
155
  search_terms = [term.strip().lower() for term in search_terms]
@@ -189,19 +187,15 @@ with demo:
189
  info="Select columns to display",
190
  )
191
  with gr.Group():
192
- # leaderboard_df = get_leaderboard_df()
193
  leaderboard_table = gr.Dataframe(
194
  value=leaderboard_df,
195
  wrap=True,
196
  height=1000,
197
  column_widths=[400, 110] + [(260 + len(c)) for c in leaderboard_df.columns[1:]],
198
  )
199
- with gr.Row():
200
- refresh_button = gr.Button("Refresh")
201
 
202
- cols_bar.change(filter_and_search, inputs=[cols_bar, search_bar], outputs=[leaderboard_table])
203
- agg.change(refresh, inputs=[agg], outputs=[leaderboard_table])
204
- search_bar.submit(filter_and_search, inputs=[cols_bar, search_bar], outputs=[leaderboard_table])
205
- refresh_button.click(refresh, inputs=[], outputs=[leaderboard_table])
206
 
207
  demo.launch()
 
13
  BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5"]
14
 
15
 
16
+ def get_leaderboard_df():
17
  filepaths = list(Path("eval_results").rglob("*.json"))
18
 
19
  # Parse filepaths to get unique models
 
128
  # Strip off date from model name
129
  df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
130
 
131
+ return df
132
+
133
+ leaderboard_df = get_leaderboard_df()
134
+
135
+ def agg_df(df, agg: str = "max"):
136
+ df = df.copy()
137
  # Drop date and aggregate results by model name
138
  df = df.drop("Date", axis=1).groupby("Model").agg(agg).reset_index()
139
 
 
142
  # Convert all values to percentage
143
  df[df.select_dtypes(include=["number"]).columns] *= 100.0
144
  df = df.sort_values(by=["Average"], ascending=False)
 
145
  return df
146
 
 
 
 
 
 
 
 
 
147
  # Function to update the table based on search query
148
+ def filter_and_search(cols: list[str], search_query: str, agg: str):
149
  df = leaderboard_df
150
+ df = agg_df(df, agg)
151
  if len(search_query) > 0:
152
  search_terms = search_query.split(";")
153
  search_terms = [term.strip().lower() for term in search_terms]
 
187
  info="Select columns to display",
188
  )
189
  with gr.Group():
 
190
  leaderboard_table = gr.Dataframe(
191
  value=leaderboard_df,
192
  wrap=True,
193
  height=1000,
194
  column_widths=[400, 110] + [(260 + len(c)) for c in leaderboard_df.columns[1:]],
195
  )
 
 
196
 
197
+ cols_bar.change(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
198
+ agg.change(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
199
+ search_bar.submit(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
 
200
 
201
  demo.launch()