Make different aggs run faster (#5)
Browse files- Make different aggs run faster (d59694170093223a311cd5b6c35dd522ed75d5bd)
app.py
CHANGED
@@ -13,7 +13,7 @@ Evaluation of H4 and community models across a diverse range of benchmarks from
|
|
13 |
BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5"]
|
14 |
|
15 |
|
16 |
-
def get_leaderboard_df(
|
17 |
filepaths = list(Path("eval_results").rglob("*.json"))
|
18 |
|
19 |
# Parse filepaths to get unique models
|
@@ -128,6 +128,12 @@ def get_leaderboard_df(agg: str = "max"):
|
|
128 |
# Strip off date from model name
|
129 |
df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
# Drop date and aggregate results by model name
|
132 |
df = df.drop("Date", axis=1).groupby("Model").agg(agg).reset_index()
|
133 |
|
@@ -136,20 +142,12 @@ def get_leaderboard_df(agg: str = "max"):
|
|
136 |
# Convert all values to percentage
|
137 |
df[df.select_dtypes(include=["number"]).columns] *= 100.0
|
138 |
df = df.sort_values(by=["Average"], ascending=False)
|
139 |
-
|
140 |
return df
|
141 |
|
142 |
-
|
143 |
-
leaderboard_df = get_leaderboard_df()
|
144 |
-
|
145 |
-
|
146 |
-
def refresh(agg: str = "max"):
|
147 |
-
return get_leaderboard_df(agg=agg)
|
148 |
-
|
149 |
-
|
150 |
# Function to update the table based on search query
|
151 |
-
def filter_and_search(cols: list[str], search_query: str):
|
152 |
df = leaderboard_df
|
|
|
153 |
if len(search_query) > 0:
|
154 |
search_terms = search_query.split(";")
|
155 |
search_terms = [term.strip().lower() for term in search_terms]
|
@@ -189,19 +187,15 @@ with demo:
|
|
189 |
info="Select columns to display",
|
190 |
)
|
191 |
with gr.Group():
|
192 |
-
# leaderboard_df = get_leaderboard_df()
|
193 |
leaderboard_table = gr.Dataframe(
|
194 |
value=leaderboard_df,
|
195 |
wrap=True,
|
196 |
height=1000,
|
197 |
column_widths=[400, 110] + [(260 + len(c)) for c in leaderboard_df.columns[1:]],
|
198 |
)
|
199 |
-
with gr.Row():
|
200 |
-
refresh_button = gr.Button("Refresh")
|
201 |
|
202 |
-
cols_bar.change(filter_and_search, inputs=[cols_bar, search_bar], outputs=[leaderboard_table])
|
203 |
-
agg.change(
|
204 |
-
search_bar.submit(filter_and_search, inputs=[cols_bar, search_bar], outputs=[leaderboard_table])
|
205 |
-
refresh_button.click(refresh, inputs=[], outputs=[leaderboard_table])
|
206 |
|
207 |
demo.launch()
|
|
|
13 |
BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5"]
|
14 |
|
15 |
|
16 |
+
def get_leaderboard_df():
|
17 |
filepaths = list(Path("eval_results").rglob("*.json"))
|
18 |
|
19 |
# Parse filepaths to get unique models
|
|
|
128 |
# Strip off date from model name
|
129 |
df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
|
130 |
|
131 |
+
return df
|
132 |
+
|
133 |
+
leaderboard_df = get_leaderboard_df()
|
134 |
+
|
135 |
+
def agg_df(df, agg: str = "max"):
|
136 |
+
df = df.copy()
|
137 |
# Drop date and aggregate results by model name
|
138 |
df = df.drop("Date", axis=1).groupby("Model").agg(agg).reset_index()
|
139 |
|
|
|
142 |
# Convert all values to percentage
|
143 |
df[df.select_dtypes(include=["number"]).columns] *= 100.0
|
144 |
df = df.sort_values(by=["Average"], ascending=False)
|
|
|
145 |
return df
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
# Function to update the table based on search query
|
148 |
+
def filter_and_search(cols: list[str], search_query: str, agg: str):
|
149 |
df = leaderboard_df
|
150 |
+
df = agg_df(df, agg)
|
151 |
if len(search_query) > 0:
|
152 |
search_terms = search_query.split(";")
|
153 |
search_terms = [term.strip().lower() for term in search_terms]
|
|
|
187 |
info="Select columns to display",
|
188 |
)
|
189 |
with gr.Group():
|
|
|
190 |
leaderboard_table = gr.Dataframe(
|
191 |
value=leaderboard_df,
|
192 |
wrap=True,
|
193 |
height=1000,
|
194 |
column_widths=[400, 110] + [(260 + len(c)) for c in leaderboard_df.columns[1:]],
|
195 |
)
|
|
|
|
|
196 |
|
197 |
+
cols_bar.change(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
|
198 |
+
agg.change(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
|
199 |
+
search_bar.submit(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
|
|
|
200 |
|
201 |
demo.launch()
|