sudoping01 commited on
Commit
5f3b2ed
Β·
verified Β·
1 Parent(s): 33f8987

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -11
app.py CHANGED
@@ -37,12 +37,36 @@ except Exception as e:
37
  references = {}
38
  print("WARNING: Using empty references dictionary due to dataset loading error")
39
 
40
- # Initialize leaderboard file
41
  leaderboard_file = "leaderboard.csv"
42
  if not os.path.exists(leaderboard_file):
43
- pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  else:
45
- print(f"Loaded existing leaderboard with {len(pd.read_csv(leaderboard_file))} entries")
 
 
 
 
 
 
 
 
 
46
 
47
  def normalize_text(text):
48
  """
@@ -147,6 +171,21 @@ def calculate_metrics(predictions_df):
147
 
148
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def process_submission(submitter_name, csv_file):
151
  try:
152
  # Read and validate the uploaded CSV
@@ -194,14 +233,20 @@ def process_submission(submitter_name, csv_file):
194
  # Update the leaderboard
195
  leaderboard = pd.read_csv(leaderboard_file)
196
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 
 
 
 
197
  new_entry = pd.DataFrame(
198
- [[submitter_name, avg_wer, avg_cer, timestamp]],
199
- columns=["submitter", "WER", "CER", "timestamp"]
200
  )
201
- leaderboard = pd.concat([leaderboard, new_entry]).sort_values("WER")
202
- leaderboard.to_csv(leaderboard_file, index=False)
203
 
204
- return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}", leaderboard
 
 
 
 
205
 
206
  except Exception as e:
207
  print(f"Error processing submission: {str(e)}")
@@ -214,7 +259,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
214
  # Bambara ASR Leaderboard
215
 
216
  This leaderboard ranks and evaluates speech recognition models for the Bambara language.
217
- Models are ranked based on their Word Error Rate (WER), from lowest to highest.
218
  """
219
  )
220
 
@@ -222,13 +267,35 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
222
  with gr.Tabs() as tabs:
223
  with gr.TabItem("πŸ… Current Rankings"):
224
  # Show current leaderboard rankings
225
- current_leaderboard = pd.read_csv(leaderboard_file).sort_values("WER")
 
 
 
 
 
 
 
226
 
227
  gr.Markdown("### Current ASR Model Rankings")
 
 
 
 
 
 
 
 
228
  leaderboard_view = gr.DataFrame(
229
  value=current_leaderboard,
230
  interactive=False,
231
- label="Models are ranked by Word Error Rate (WER) - lower is better"
 
 
 
 
 
 
 
232
  )
233
 
234
  gr.Markdown(
@@ -236,6 +303,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
236
  ## Metrics Explanation
237
  - **WER**: Word Error Rate (lower is better) - measures word-level accuracy
238
  - **CER**: Character Error Rate (lower is better) - measures character-level accuracy
 
239
  """
240
  )
241
 
 
37
  references = {}
38
  print("WARNING: Using empty references dictionary due to dataset loading error")
39
 
40
+ # Initialize leaderboard file with combined score
41
  leaderboard_file = "leaderboard.csv"
42
  if not os.path.exists(leaderboard_file):
43
+ # Create empty leaderboard with necessary columns
44
+ pd.DataFrame(columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
45
+ print("Created new leaderboard file")
46
+
47
+ # Add example entries so first-time visitors see something
48
+ example_data = [
49
+ ["Example Model 1", 0.35, 0.20, 0.305, "2023-01-01 00:00:00"],
50
+ ["Example Model 2", 0.40, 0.18, 0.334, "2023-01-02 00:00:00"],
51
+ ["Example Model 3", 0.32, 0.25, 0.299, "2023-01-03 00:00:00"]
52
+ ]
53
+ example_df = pd.DataFrame(
54
+ example_data,
55
+ columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]
56
+ )
57
+ example_df.to_csv(leaderboard_file, index=False)
58
+ print("Added example data to empty leaderboard for demonstration")
59
  else:
60
+ # Load existing leaderboard
61
+ leaderboard_df = pd.read_csv(leaderboard_file)
62
+
63
+ # Add Combined_Score column if it doesn't exist
64
+ if "Combined_Score" not in leaderboard_df.columns:
65
+ leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
66
+ leaderboard_df.to_csv(leaderboard_file, index=False)
67
+ print("Added Combined_Score column to existing leaderboard")
68
+
69
+ print(f"Loaded existing leaderboard with {len(leaderboard_df)} entries")
70
 
71
  def normalize_text(text):
72
  """
 
171
 
172
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
173
 
174
+ def update_ranking(method):
175
+ """Update leaderboard ranking based on selected method"""
176
+ current_lb = pd.read_csv(leaderboard_file)
177
+
178
+ # Calculate combined score if not present
179
+ if "Combined_Score" not in current_lb.columns:
180
+ current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
181
+
182
+ if method == "WER Only":
183
+ return current_lb.sort_values("WER")
184
+ elif method == "CER Only":
185
+ return current_lb.sort_values("CER")
186
+ else: # Combined Score
187
+ return current_lb.sort_values("Combined_Score")
188
+
189
  def process_submission(submitter_name, csv_file):
190
  try:
191
  # Read and validate the uploaded CSV
 
233
  # Update the leaderboard
234
  leaderboard = pd.read_csv(leaderboard_file)
235
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
236
+
237
+ # Calculate combined score (70% WER, 30% CER)
238
+ combined_score = avg_wer * 0.7 + avg_cer * 0.3
239
+
240
  new_entry = pd.DataFrame(
241
+ [[submitter_name, avg_wer, avg_cer, combined_score, timestamp]],
242
+ columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]
243
  )
 
 
244
 
245
+ # Add new entry to leaderboard
246
+ updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
247
+ updated_leaderboard.to_csv(leaderboard_file, index=False)
248
+
249
+ return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}, Combined Score: {combined_score:.4f}", updated_leaderboard
250
 
251
  except Exception as e:
252
  print(f"Error processing submission: {str(e)}")
 
259
  # Bambara ASR Leaderboard
260
 
261
  This leaderboard ranks and evaluates speech recognition models for the Bambara language.
262
+ Models are ranked based on a combined score of WER and CER metrics.
263
  """
264
  )
265
 
 
267
  with gr.Tabs() as tabs:
268
  with gr.TabItem("πŸ… Current Rankings"):
269
  # Show current leaderboard rankings
270
+ current_leaderboard = pd.read_csv(leaderboard_file)
271
+
272
+ # Calculate combined score if not present
273
+ if "Combined_Score" not in current_leaderboard.columns:
274
+ current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
275
+
276
+ # Sort by combined score
277
+ current_leaderboard = current_leaderboard.sort_values("Combined_Score")
278
 
279
  gr.Markdown("### Current ASR Model Rankings")
280
+
281
+ # Add radio buttons for ranking method
282
+ ranking_method = gr.Radio(
283
+ ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
284
+ label="Ranking Method",
285
+ value="Combined Score (WER 70%, CER 30%)"
286
+ )
287
+
288
  leaderboard_view = gr.DataFrame(
289
  value=current_leaderboard,
290
  interactive=False,
291
+ label="Models are ranked by selected metric - lower is better"
292
+ )
293
+
294
+ # Update leaderboard based on ranking method selection
295
+ ranking_method.change(
296
+ fn=update_ranking,
297
+ inputs=[ranking_method],
298
+ outputs=[leaderboard_view]
299
  )
300
 
301
  gr.Markdown(
 
303
  ## Metrics Explanation
304
  - **WER**: Word Error Rate (lower is better) - measures word-level accuracy
305
  - **CER**: Character Error Rate (lower is better) - measures character-level accuracy
306
+ - **Combined Score**: Weighted average of WER (70%) and CER (30%) - provides a balanced evaluation
307
  """
308
  )
309