Steveeeeeeen HF staff commited on
Commit
2ba944a
·
verified ·
1 Parent(s): 2a1b287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -7
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import json
4
- from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS, EXPLANATION
5
  from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
6
  from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
7
  from datetime import datetime, timezone
@@ -48,6 +48,26 @@ column_names = {
48
  "Spanish_male": "Spanish male",
49
  "Vietnamese_female": "Vietnamese female",
50
  "Vietnamese_male": "Vietnamese male",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
 
53
  african_cols = ["Ghanain English female", "Kenyan English female", "Kenyan English male", "Nigerian English female", "Nigerian English male"]
@@ -57,13 +77,15 @@ latin_american_cols = ["Latin American female", "Latin American male"]
57
  british_cols = ["Irish English female", "Irish English male", "Scottish English male", "Southern British English male"]
58
  european_cols = ["Eastern European male", "European male", "French female", "Italian female", "Spanish female", "Spanish male", "Catalan female", "Bulgarian female", "Bulgarian male", "Lithuanian male", "Romanian female"]
59
  asian_cols = ["Chinese female", "Chinese male", "Indonesian female", "Vietnamese female", "Vietnamese male", "Indian English female", "Indian English male"]
60
- eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
61
 
62
- if not csv_results.exists():
63
- raise Exception(f"CSV file {csv_results} does not exist locally")
64
 
65
  # Get csv with data and parse columns
66
- original_df = pd.read_csv(csv_results)
 
 
67
 
68
  # Formats the columns
69
  def formatter(x):
@@ -79,8 +101,17 @@ for col in original_df.columns:
79
  else:
80
  original_df[col] = original_df[col].apply(formatter) # For numerical values
81
 
 
 
 
 
 
 
82
  original_df.rename(columns=column_names, inplace=True)
83
  original_df.sort_values(by='Average WER ⬇️', inplace=True)
 
 
 
84
  female_cols = [col for col in original_df.columns if 'female' == col.split(' ')[-1]]
85
  male_cols = [col for col in original_df.columns if 'male' == col.split(' ')[-1]]
86
 
@@ -150,13 +181,14 @@ TYPES = [c.type for c in fields(AutoEvalColumn)]
150
  with gr.Blocks(css=LEADERBOARD_CSS) as demo:
151
  # gr.HTML(BANNER, elem_id="banner")
152
  # Write a header with the title
153
- gr.Markdown("<h1>🤗 Testing Model Robustness on EdAcc</h1>", elem_classes="markdown-text")
154
 
155
 
156
  gr.Markdown(EXPLANATION, elem_classes="markdown-text")
157
 
158
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
159
- with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
 
160
  # Add column filter dropdown
161
  column_filter = gr.Dropdown(
162
  choices=["All", "Female", "Male", "African", "North American", "Caribbean", "Latin American", "British", "European", "Asian"] + [v for k,v in column_names.items() if k != "model"],
@@ -217,4 +249,37 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
217
  outputs=[leaderboard_table]
218
  )
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  demo.launch(ssr_mode=False)
 
1
  import gradio as gr
2
  import pandas as pd
3
  import json
4
+ from constants import LEADERBOARD_CSS, EXPLANATION, EXPLANATION_EDACC, EXPLANATION_AFRI
5
  from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
6
  from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
7
  from datetime import datetime, timezone
 
48
  "Spanish_male": "Spanish male",
49
  "Vietnamese_female": "Vietnamese female",
50
  "Vietnamese_male": "Vietnamese male",
51
+ "agatu_test": "Agatu",
52
+ "angas_test": "Angas",
53
+ "bajju_test": "Bajju",
54
+ "bini_test": "Bini",
55
+ "brass_test": "Brass",
56
+ "delta_test": "Delta",
57
+ "eggon_test": "Eggon",
58
+ "ekene_test": "Ekene",
59
+ "ekpeye_test": "Ekpeye",
60
+ "gbagyi_test": "Gbagyi",
61
+ "igarra_test": "Igarra",
62
+ "ijaw-nembe_test": "Ijaw-Nembe",
63
+ "ikulu_test": "Ikulu",
64
+ "jaba_test": "Jaba",
65
+ "jukun_test": "Jukun",
66
+ "khana_test": "Khana",
67
+ "mada_test": "Mada",
68
+ "mwaghavul_test": "Mwaghavul",
69
+ "ukwuani_test": "Ukwuani",
70
+ "yoruba-hausa_test": "Yoruba-Hausa",
71
  }
72
 
73
  african_cols = ["Ghanain English female", "Kenyan English female", "Kenyan English male", "Nigerian English female", "Nigerian English male"]
 
77
  british_cols = ["Irish English female", "Irish English male", "Scottish English male", "Southern British English male"]
78
  european_cols = ["Eastern European male", "European male", "French female", "Italian female", "Spanish female", "Spanish male", "Catalan female", "Bulgarian female", "Bulgarian male", "Lithuanian male", "Romanian female"]
79
  asian_cols = ["Chinese female", "Chinese male", "Indonesian female", "Vietnamese female", "Vietnamese male", "Indian English female", "Indian English male"]
80
+ eval_queue_repo_edacc, requested_models, csv_results_edacc, csv_results_afrispeech = load_all_info_from_dataset_hub()
81
 
82
+ if not csv_results_edacc.exists():
83
+ raise Exception(f"CSV file {csv_results_edacc} does not exist locally")
84
 
85
  # Get csv with data and parse columns
86
+ original_df = pd.read_csv(csv_results_edacc)
87
+
88
+ afrispeech_df = pd.read_csv(csv_results_afrispeech)
89
 
90
  # Formats the columns
91
  def formatter(x):
 
101
  else:
102
  original_df[col] = original_df[col].apply(formatter) # For numerical values
103
 
104
+ for col in afrispeech_df.columns:
105
+ if col == "model":
106
+ afrispeech_df[col] = afrispeech_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
107
+ else:
108
+ afrispeech_df[col] = afrispeech_df[col].apply(formatter) # For numerical values
109
+
110
  original_df.rename(columns=column_names, inplace=True)
111
  original_df.sort_values(by='Average WER ⬇️', inplace=True)
112
+ afrispeech_df.rename(columns=column_names, inplace=True)
113
+ afrispeech_df.sort_values(by='Average WER ⬇️', inplace=True)
114
+
115
  female_cols = [col for col in original_df.columns if 'female' == col.split(' ')[-1]]
116
  male_cols = [col for col in original_df.columns if 'male' == col.split(' ')[-1]]
117
 
 
181
  with gr.Blocks(css=LEADERBOARD_CSS) as demo:
182
  # gr.HTML(BANNER, elem_id="banner")
183
  # Write a header with the title
184
+ gr.Markdown("<h1>🤫 How Biased is Whisper?</h1>", elem_classes="markdown-text")
185
 
186
 
187
  gr.Markdown(EXPLANATION, elem_classes="markdown-text")
188
 
189
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
190
+ with gr.TabItem("🏅 Edacc Results", elem_id="od-benchmark-tab-table", id=0):
191
+ gr.Markdown(EXPLANATION_EDACC, elem_classes="markdown-text")
192
  # Add column filter dropdown
193
  column_filter = gr.Dropdown(
194
  choices=["All", "Female", "Male", "African", "North American", "Caribbean", "Latin American", "British", "European", "Asian"] + [v for k,v in column_names.items() if k != "model"],
 
249
  outputs=[leaderboard_table]
250
  )
251
 
252
+ with gr.TabItem("🏅 Afrispeech Results", elem_id="od-benchmark-tab-table", id=1):
253
+ gr.Markdown(EXPLANATION_AFRI, elem_classes="markdown-text")
254
+ # Add column filter dropdown
255
+ afrispeech_column_filter = gr.Dropdown(
256
+ choices=["All"] + [v for k,v in column_names.items() if k != "model" and v in afrispeech_df.columns],
257
+ label="Filter by column",
258
+ multiselect=True,
259
+ value=["All"],
260
+ elem_id="afrispeech-column-filter"
261
+ )
262
+
263
+ leaderboard_table = gr.components.Dataframe(
264
+ value=afrispeech_df,
265
+ datatype=TYPES,
266
+ elem_id="leaderboard-table",
267
+ interactive=False,
268
+ visible=True,
269
+ )
270
+
271
+ # Update table columns when filter changes
272
+ def update_afrispeech_table(cols):
273
+ if "All" in cols:
274
+ return gr.Dataframe(value=afrispeech_df)
275
+
276
+ selected_cols = ["Model"] + cols # Always include the Model column
277
+ return gr.Dataframe(value=afrispeech_df[selected_cols])
278
+
279
+ afrispeech_column_filter.change(
280
+ fn=update_afrispeech_table,
281
+ inputs=[afrispeech_column_filter],
282
+ outputs=[leaderboard_table]
283
+ )
284
+
285
  demo.launch(ssr_mode=False)