sudoping01 commited on
Commit
1c9f0b6
·
verified ·
1 Parent(s): 5815dce

Update app.py

Browse files

make model name cliquable and around the metrics

Files changed (1) hide show
  1. app.py +43 -28
app.py CHANGED
@@ -28,13 +28,7 @@ else:
28
  leaderboard_df.to_csv(leaderboard_file, index=False)
29
 
30
  def normalize_text(text):
31
- """
32
- Normalize text for WER/CER calculation:
33
- - Convert to lowercase
34
- - Remove punctuation
35
- - Replace multiple spaces with single space
36
- - Strip leading/trailing spaces
37
- """
38
  if not isinstance(text, str):
39
  text = str(text)
40
 
@@ -92,21 +86,42 @@ def calculate_metrics(predictions_df):
92
  avg_wer = sum(item["wer"] for item in results) / len(results)
93
  avg_cer = sum(item["cer"] for item in results) / len(results)
94
 
 
95
  weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
96
  weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
97
 
98
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
99
 
100
- def add_ranking_numbers(df, sort_by="Combined_Score"):
101
- """Add ranking numbers to the dataframe based on the sort column"""
 
 
 
 
 
 
 
 
 
102
  if len(df) == 0:
103
- return pd.DataFrame(columns=["Rank"] + list(df.columns))
104
 
105
 
106
- sorted_df = df.sort_values(sort_by)
107
- sorted_df.insert(0, "Rank", range(1, len(sorted_df) + 1))
 
 
 
108
 
109
- return sorted_df
 
 
 
 
 
 
 
 
110
 
111
  def update_ranking(method):
112
  """Update leaderboard ranking based on selected method"""
@@ -122,10 +137,10 @@ def update_ranking(method):
122
  elif method == "CER Only":
123
  sort_column = "CER"
124
 
125
- return add_ranking_numbers(current_lb, sort_column)
126
 
127
  except Exception:
128
- return pd.DataFrame(columns=["Rank", "Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
129
 
130
  def process_submission(model_name, csv_file):
131
  try:
@@ -163,6 +178,7 @@ def process_submission(model_name, csv_file):
163
  leaderboard = pd.read_csv(leaderboard_file)
164
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
165
 
 
166
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
167
 
168
  new_entry = pd.DataFrame(
@@ -170,13 +186,13 @@ def process_submission(model_name, csv_file):
170
  columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
171
  )
172
 
173
-
174
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
175
  updated_leaderboard.to_csv(leaderboard_file, index=False)
176
 
177
- ranked_leaderboard = add_ranking_numbers(updated_leaderboard)
178
 
179
- return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}, Combined Score: {combined_score:.4f}", ranked_leaderboard
180
 
181
  except Exception as e:
182
  return f"Error processing submission: {str(e)}", None
@@ -184,7 +200,7 @@ def process_submission(model_name, csv_file):
184
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
185
  gr.Markdown(
186
  """
187
- # Bambara ASR Leaderboard
188
 
189
  This leaderboard ranks and evaluates speech recognition models for the Bambara language.
190
  Models are ranked based on a combined score of WER and CER metrics.
@@ -199,10 +215,9 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
199
  if "Combined_Score" not in current_leaderboard.columns:
200
  current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
201
 
202
-
203
- current_leaderboard = add_ranking_numbers(current_leaderboard.sort_values("Combined_Score"))
204
  except Exception:
205
- current_leaderboard = pd.DataFrame(columns=["Rank", "Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
206
 
207
  gr.Markdown("### Current ASR Model Rankings")
208
 
@@ -213,7 +228,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
213
  )
214
 
215
  leaderboard_view = gr.DataFrame(
216
- value=current_leaderboard,
217
  interactive=False,
218
  label="Models are ranked by selected metric - lower is better"
219
  )
@@ -227,9 +242,9 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
227
  gr.Markdown(
228
  """
229
  ## Metrics Explanation
230
- - **WER**: Word Error Rate (lower is better) - measures word-level accuracy
231
- - **CER**: Character Error Rate (lower is better) - measures character-level accuracy
232
- - **Combined Score**: Weighted average of WER (70%) and CER (30%) - provides a balanced evaluation
233
  """
234
  )
235
 
@@ -251,7 +266,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
251
  output_msg = gr.Textbox(label="Status", interactive=False)
252
  leaderboard_display = gr.DataFrame(
253
  label="Updated Leaderboard",
254
- value=current_leaderboard,
255
  interactive=False
256
  )
257
 
@@ -262,4 +277,4 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
262
  )
263
 
264
  if __name__ == "__main__":
265
- demo.launch()
 
28
  leaderboard_df.to_csv(leaderboard_file, index=False)
29
 
30
  def normalize_text(text):
31
+ """Normalize text for WER/CER calculation"""
 
 
 
 
 
 
32
  if not isinstance(text, str):
33
  text = str(text)
34
 
 
86
  avg_wer = sum(item["wer"] for item in results) / len(results)
87
  avg_cer = sum(item["cer"] for item in results) / len(results)
88
 
89
+ # Calculate weighted average metrics based on reference length
90
  weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
91
  weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
92
 
93
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
94
 
95
+ def format_as_percentage(value):
96
+ """Convert decimal to percentage with 2 decimal places"""
97
+ return f"{value * 100:.2f}%"
98
+
99
+ def make_clickable_model(model_name):
100
+ """Format model name as clickable link to Hugging Face hub"""
101
+ link = f"https://huggingface.co/{model_name}"
102
+ return f'<a href="{link}" target="_blank" style="text-decoration: underline;">{model_name}</a>'
103
+
104
+ def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
105
+ """Format leaderboard for display with ranking and percentages"""
106
  if len(df) == 0:
107
+ return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
108
 
109
 
110
+ display_df = df.copy()
111
+
112
+ display_df = display_df.sort_values(sort_by)
113
+
114
+ display_df.insert(0, "Rank", range(1, len(display_df) + 1))
115
 
116
+ for col in ["WER", "CER", "Combined_Score"]:
117
+ if col in display_df.columns:
118
+ display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
119
+ display_df = display_df.drop(col, axis=1)
120
+
121
+ if "Model_Name" in display_df.columns:
122
+ display_df["Model_Name"] = display_df["Model_Name"].apply(make_clickable_model)
123
+
124
+ return display_df
125
 
126
  def update_ranking(method):
127
  """Update leaderboard ranking based on selected method"""
 
137
  elif method == "CER Only":
138
  sort_column = "CER"
139
 
140
+ return prepare_leaderboard_for_display(current_lb, sort_column)
141
 
142
  except Exception:
143
+ return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
144
 
145
  def process_submission(model_name, csv_file):
146
  try:
 
178
  leaderboard = pd.read_csv(leaderboard_file)
179
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
180
 
181
+ # Calculate combined score (70% WER, 30% CER)
182
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
183
 
184
  new_entry = pd.DataFrame(
 
186
  columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
187
  )
188
 
189
+
190
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
191
  updated_leaderboard.to_csv(leaderboard_file, index=False)
192
 
193
+ display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
194
 
195
+ return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
196
 
197
  except Exception as e:
198
  return f"Error processing submission: {str(e)}", None
 
200
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
201
  gr.Markdown(
202
  """
203
+ # 🇲🇱 Bambara ASR Leaderboard
204
 
205
  This leaderboard ranks and evaluates speech recognition models for the Bambara language.
206
  Models are ranked based on a combined score of WER and CER metrics.
 
215
  if "Combined_Score" not in current_leaderboard.columns:
216
  current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
217
 
218
+ display_leaderboard = prepare_leaderboard_for_display(current_leaderboard)
 
219
  except Exception:
220
+ display_leaderboard = pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
221
 
222
  gr.Markdown("### Current ASR Model Rankings")
223
 
 
228
  )
229
 
230
  leaderboard_view = gr.DataFrame(
231
+ value=display_leaderboard,
232
  interactive=False,
233
  label="Models are ranked by selected metric - lower is better"
234
  )
 
242
  gr.Markdown(
243
  """
244
  ## Metrics Explanation
245
+ - **WER (%)**: Word Error Rate (lower is better) - measures word-level accuracy
246
+ - **CER (%)**: Character Error Rate (lower is better) - measures character-level accuracy
247
+ - **Combined Score (%)**: Weighted average of WER (70%) and CER (30%) - provides a balanced evaluation
248
  """
249
  )
250
 
 
266
  output_msg = gr.Textbox(label="Status", interactive=False)
267
  leaderboard_display = gr.DataFrame(
268
  label="Updated Leaderboard",
269
+ value=display_leaderboard,
270
  interactive=False
271
  )
272
 
 
277
  )
278
 
279
  if __name__ == "__main__":
280
+ demo.launch()