sudoping01 commited on
Commit
f23d956
·
verified ·
1 Parent(s): fdebe26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -33
app.py CHANGED
@@ -13,20 +13,51 @@ token = os.environ.get("HG_TOKEN")
13
  login(token)
14
 
15
  print("Loading dataset...")
16
- dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
17
- references = {row["id"]: row["text"] for row in dataset}
 
 
 
 
 
 
18
 
 
19
  leaderboard_file = "leaderboard.csv"
20
  if not os.path.exists(leaderboard_file):
 
21
  pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
22
-
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  else:
24
-
25
  leaderboard_df = pd.read_csv(leaderboard_file)
26
 
 
 
 
 
 
 
 
27
  if "Combined_Score" not in leaderboard_df.columns:
28
  leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3 # WER 70% and CER 30%
29
  leaderboard_df.to_csv(leaderboard_file, index=False)
 
 
 
30
 
31
  def normalize_text(text):
32
  """
@@ -106,24 +137,28 @@ def calculate_metrics(predictions_df):
106
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
107
 
108
  def update_ranking(method):
109
-
110
- current_lb = pd.read_csv(leaderboard_file)
111
-
112
- if "Combined_Score" not in current_lb.columns:
113
- current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3 # 70% for WER
114
-
115
- if method == "WER Only":
116
- return current_lb.sort_values("WER")
117
- elif method == "CER Only":
118
- return current_lb.sort_values("CER")
119
- else: # Combined Score
120
- return current_lb.sort_values("Combined_Score")
121
-
122
- def process_submission(submitter_name, csv_file):
123
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
 
 
125
  df = pd.read_csv(csv_file)
126
- print(f"Processing submission from {submitter_name} with {len(df)} rows")
127
 
128
  if len(df) == 0:
129
  return "Error: Uploaded CSV is empty.", None
@@ -149,8 +184,6 @@ def process_submission(submitter_name, csv_file):
149
  try:
150
  avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
151
 
152
-
153
-
154
  # suspiciously low values
155
  if avg_wer < 0.001:
156
  return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
@@ -165,8 +198,8 @@ def process_submission(submitter_name, csv_file):
165
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
166
 
167
  new_entry = pd.DataFrame(
168
- [[submitter_name, avg_wer, avg_cer, combined_score, timestamp]],
169
- columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]
170
  )
171
 
172
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
@@ -177,6 +210,22 @@ def process_submission(submitter_name, csv_file):
177
  except Exception as e:
178
  return f"Error processing submission: {str(e)}", None
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
182
  gr.Markdown(
@@ -190,12 +239,19 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
190
 
191
  with gr.Tabs() as tabs:
192
  with gr.TabItem("🏅 Current Rankings"):
193
- current_leaderboard = pd.read_csv(leaderboard_file)
194
-
195
- if "Combined_Score" not in current_leaderboard.columns:
196
- current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
197
-
198
- current_leaderboard = current_leaderboard.sort_values("Combined_Score")
 
 
 
 
 
 
 
199
 
200
  gr.Markdown("### Current ASR Model Rankings")
201
 
@@ -237,7 +293,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
237
  )
238
 
239
  with gr.Row():
240
- submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
241
  csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
242
 
243
  submit_btn = gr.Button("Submit")
@@ -250,13 +306,11 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
250
 
251
  submit_btn.click(
252
  fn=process_submission,
253
- inputs=[submitter, csv_upload],
254
  outputs=[output_msg, leaderboard_display]
255
  )
256
 
257
-
258
  print("Starting Bambara ASR Leaderboard app...")
259
 
260
-
261
  if __name__ == "__main__":
262
  demo.launch(share=True)
 
13
  login(token)
14
 
15
  print("Loading dataset...")
16
+ try:
17
+ dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
18
+ references = {row["id"]: row["text"] for row in dataset}
19
+ print(f"Successfully loaded dataset with {len(references)} samples")
20
+ except Exception as e:
21
+ print(f"Error loading dataset: {str(e)}")
22
+ references = {}
23
+ print("WARNING: Using empty references dictionary due to dataset loading error")
24
 
25
+ # Initialize leaderboard file with consistent column names
26
  leaderboard_file = "leaderboard.csv"
27
  if not os.path.exists(leaderboard_file):
28
+ # Create with Model_Name consistently
29
  pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
30
+ print("Created new leaderboard file")
31
+
32
+ # Add example entries for first-time visitors
33
+ example_data = [
34
+ ["Example Model 1", 0.35, 0.20, 0.305, "2023-01-01 00:00:00"],
35
+ ["Example Model 2", 0.40, 0.18, 0.334, "2023-01-02 00:00:00"],
36
+ ["Example Model 3", 0.32, 0.25, 0.299, "2023-01-03 00:00:00"]
37
+ ]
38
+ example_df = pd.DataFrame(
39
+ example_data,
40
+ columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
41
+ )
42
+ example_df.to_csv(leaderboard_file, index=False)
43
+ print("Added example data to empty leaderboard for demonstration")
44
  else:
45
+ # Load existing leaderboard
46
  leaderboard_df = pd.read_csv(leaderboard_file)
47
 
48
+ # Rename column if needed for consistency
49
+ if "submitter" in leaderboard_df.columns and "Model_Name" not in leaderboard_df.columns:
50
+ leaderboard_df = leaderboard_df.rename(columns={"submitter": "Model_Name"})
51
+ leaderboard_df.to_csv(leaderboard_file, index=False)
52
+ print("Renamed 'submitter' column to 'Model_Name' for consistency")
53
+
54
+ # Add Combined_Score column if it doesn't exist
55
  if "Combined_Score" not in leaderboard_df.columns:
56
  leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3 # WER 70% and CER 30%
57
  leaderboard_df.to_csv(leaderboard_file, index=False)
58
+ print("Added Combined_Score column to existing leaderboard")
59
+
60
+ print(f"Loaded existing leaderboard with {len(leaderboard_df)} entries")
61
 
62
  def normalize_text(text):
63
  """
 
137
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
138
 
139
  def update_ranking(method):
140
+ """Update leaderboard ranking based on selected method"""
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  try:
142
+ current_lb = pd.read_csv(leaderboard_file)
143
+
144
+ if "Combined_Score" not in current_lb.columns:
145
+ current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3 # 70% for WER
146
+
147
+ if method == "WER Only":
148
+ return current_lb.sort_values("WER")
149
+ elif method == "CER Only":
150
+ return current_lb.sort_values("CER")
151
+ else: # Combined Score
152
+ return current_lb.sort_values("Combined_Score")
153
+ except Exception as e:
154
+ print(f"Error updating ranking: {str(e)}")
155
+ # Return empty dataframe if something goes wrong
156
+ return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
157
 
158
+ def process_submission(model_name, csv_file):
159
+ try:
160
  df = pd.read_csv(csv_file)
161
+ print(f"Processing submission from {model_name} with {len(df)} rows")
162
 
163
  if len(df) == 0:
164
  return "Error: Uploaded CSV is empty.", None
 
184
  try:
185
  avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
186
 
 
 
187
  # suspiciously low values
188
  if avg_wer < 0.001:
189
  return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
 
198
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
199
 
200
  new_entry = pd.DataFrame(
201
+ [[model_name, avg_wer, avg_cer, combined_score, timestamp]],
202
+ columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
203
  )
204
 
205
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
 
210
  except Exception as e:
211
  return f"Error processing submission: {str(e)}", None
212
 
213
+ # Make sure we have at least some data for first-time visitors
214
+ if os.path.exists(leaderboard_file):
215
+ leaderboard_df = pd.read_csv(leaderboard_file)
216
+ if len(leaderboard_df) == 0:
217
+ # Add example entries if leaderboard is empty
218
+ example_data = [
219
+ ["Example Model 1", 0.35, 0.20, 0.305, "2023-01-01 00:00:00"],
220
+ ["Example Model 2", 0.40, 0.18, 0.334, "2023-01-02 00:00:00"],
221
+ ["Example Model 3", 0.32, 0.25, 0.299, "2023-01-03 00:00:00"]
222
+ ]
223
+ example_df = pd.DataFrame(
224
+ example_data,
225
+ columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
226
+ )
227
+ example_df.to_csv(leaderboard_file, index=False)
228
+ print("Added example data to empty leaderboard for demonstration")
229
 
230
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
231
  gr.Markdown(
 
239
 
240
  with gr.Tabs() as tabs:
241
  with gr.TabItem("🏅 Current Rankings"):
242
+ try:
243
+ # Load and make sure we have current leaderboard data
244
+ current_leaderboard = pd.read_csv(leaderboard_file)
245
+
246
+ if "Combined_Score" not in current_leaderboard.columns:
247
+ current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
248
+
249
+ # Sort by combined score
250
+ current_leaderboard = current_leaderboard.sort_values("Combined_Score")
251
+ except Exception as e:
252
+ print(f"Error loading leaderboard: {str(e)}")
253
+ # Create empty dataframe if we can't load the file
254
+ current_leaderboard = pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
255
 
256
  gr.Markdown("### Current ASR Model Rankings")
257
 
 
293
  )
294
 
295
  with gr.Row():
296
+ model_name_input = gr.Textbox(label="Model Name", placeholder="e.g., MALIBA-AI/asr")
297
  csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
298
 
299
  submit_btn = gr.Button("Submit")
 
306
 
307
  submit_btn.click(
308
  fn=process_submission,
309
+ inputs=[model_name_input, csv_upload],
310
  outputs=[output_msg, leaderboard_display]
311
  )
312
 
 
313
  print("Starting Bambara ASR Leaderboard app...")
314
 
 
315
  if __name__ == "__main__":
316
  demo.launch(share=True)