Spaces:
Running
Running
add option to exclude environment from mean
Browse files
app.py
CHANGED
@@ -28,6 +28,15 @@ def filter_dfs(tags, lb):
|
|
28 |
lb = lb[lb["Tags"].apply(lambda x: any(tag in x for tag in tags))]
|
29 |
return lb
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
def restart_space():
|
33 |
API.restart_space(repo_id=REPO_ID)
|
@@ -129,7 +138,8 @@ agg_df = BenchmarkSuite.aggregate_df(results_df)
|
|
129 |
agg_df = agg_df.pivot(index="dataset", columns="benchmark_category", values="score")
|
130 |
agg_df.rename(columns={"OVERALL": "General"}, inplace=True)
|
131 |
agg_df.columns = [x.capitalize() for x in agg_df.columns]
|
132 |
-
agg_df["Mean"
|
|
|
133 |
# make sure mean is the first column
|
134 |
agg_df = agg_df[["Mean"] + [col for col in agg_df.columns if col != "Mean"]]
|
135 |
for col in agg_df.columns:
|
@@ -212,6 +222,9 @@ app = gr.Blocks(css=custom_css, title="TTS Benchmark Leaderboard")
|
|
212 |
with app:
|
213 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
214 |
with gr.TabItem("π
TTSDB Scores", elem_id="llm-benchmark-tab-table", id=0):
|
|
|
|
|
|
|
215 |
tags = gr.Dropdown(
|
216 |
TAGS,
|
217 |
value=[],
|
@@ -221,6 +234,7 @@ with app:
|
|
221 |
)
|
222 |
leaderboard = init_leaderboard(f_a_df)
|
223 |
tags.change(filter_dfs, [tags, leaderboard], [leaderboard])
|
|
|
224 |
with gr.TabItem("π
Individual Benchmarks", elem_id="llm-benchmark-tab-table", id=1):
|
225 |
tags = gr.Dropdown(
|
226 |
TAGS,
|
|
|
28 |
lb = lb[lb["Tags"].apply(lambda x: any(tag in x for tag in tags))]
|
29 |
return lb
|
30 |
|
31 |
+
def change_mean(env, lb):
|
32 |
+
global f_b_df, f_a_df
|
33 |
+
lb = f_a_df.copy()
|
34 |
+
if env:
|
35 |
+
mean_cols = [col for col in lb.columns if str(col) not in ["Mean", "Environment", "Model", "Tags"]]
|
36 |
+
else:
|
37 |
+
mean_cols = [col for col in lb.columns if str(col) not in ["Mean", "Model", "Tags"]]
|
38 |
+
lb["Mean"] = lb[mean_cols].mean(axis=1)
|
39 |
+
return lb
|
40 |
|
41 |
def restart_space():
|
42 |
API.restart_space(repo_id=REPO_ID)
|
|
|
138 |
agg_df = agg_df.pivot(index="dataset", columns="benchmark_category", values="score")
|
139 |
agg_df.rename(columns={"OVERALL": "General"}, inplace=True)
|
140 |
agg_df.columns = [x.capitalize() for x in agg_df.columns]
|
141 |
+
mean_cols = [col for col in agg_df.columns if str(col) not in ["Mean", "Environment", "Model", "Tags"]]
|
142 |
+
agg_df["Mean"] = agg_df[mean_cols].mean(axis=1)
|
143 |
# make sure mean is the first column
|
144 |
agg_df = agg_df[["Mean"] + [col for col in agg_df.columns if col != "Mean"]]
|
145 |
for col in agg_df.columns:
|
|
|
222 |
with app:
|
223 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
224 |
with gr.TabItem("π
TTSDB Scores", elem_id="llm-benchmark-tab-table", id=0):
|
225 |
+
with gr.Group():
|
226 |
+
env = gr.Checkbox(value=True, label="Exclude environment from mean.")
|
227 |
+
gr.Markdown("**Environment** measures how well the system can reproduce noise in the training data. This doesn't correlate with human judgements for 'naturalness'")
|
228 |
tags = gr.Dropdown(
|
229 |
TAGS,
|
230 |
value=[],
|
|
|
234 |
)
|
235 |
leaderboard = init_leaderboard(f_a_df)
|
236 |
tags.change(filter_dfs, [tags, leaderboard], [leaderboard])
|
237 |
+
env.change(change_mean, [env, leaderboard], [leaderboard])
|
238 |
with gr.TabItem("π
Individual Benchmarks", elem_id="llm-benchmark-tab-table", id=1):
|
239 |
tags = gr.Dropdown(
|
240 |
TAGS,
|