sudoping01 commited on
Commit
5815dce
·
verified ·
1 Parent(s): e8b48ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -17,18 +17,12 @@ try:
17
  except Exception as e:
18
  references = {}
19
 
20
-
21
  leaderboard_file = "leaderboard.csv"
22
  if not os.path.exists(leaderboard_file):
23
  pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
24
  else:
25
  leaderboard_df = pd.read_csv(leaderboard_file)
26
 
27
-
28
- # if "submitter" in leaderboard_df.columns and "Model_Name" not in leaderboard_df.columns:
29
- # leaderboard_df = leaderboard_df.rename(columns={"submitter": "Model_Name"})
30
- # leaderboard_df.to_csv(leaderboard_file, index=False)
31
-
32
  if "Combined_Score" not in leaderboard_df.columns:
33
  leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
34
  leaderboard_df.to_csv(leaderboard_file, index=False)
@@ -95,16 +89,25 @@ def calculate_metrics(predictions_df):
95
  if not results:
96
  raise ValueError("No valid samples for WER/CER calculation")
97
 
98
-
99
  avg_wer = sum(item["wer"] for item in results) / len(results)
100
  avg_cer = sum(item["cer"] for item in results) / len(results)
101
 
102
- # Calculate weighted average metrics based on reference length
103
  weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
104
  weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
105
 
106
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
107
 
 
 
 
 
 
 
 
 
 
 
 
108
  def update_ranking(method):
109
  """Update leaderboard ranking based on selected method"""
110
  try:
@@ -113,14 +116,16 @@ def update_ranking(method):
113
  if "Combined_Score" not in current_lb.columns:
114
  current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
115
 
 
116
  if method == "WER Only":
117
- return current_lb.sort_values("WER")
118
  elif method == "CER Only":
119
- return current_lb.sort_values("CER")
120
- else: # Combined Score
121
- return current_lb.sort_values("Combined_Score")
 
122
  except Exception:
123
- return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
124
 
125
  def process_submission(model_name, csv_file):
126
  try:
@@ -136,7 +141,6 @@ def process_submission(model_name, csv_file):
136
  dup_ids = df[df["id"].duplicated()]["id"].unique()
137
  return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
138
 
139
-
140
  missing_ids = set(references.keys()) - set(df["id"])
141
  extra_ids = set(df["id"]) - set(references.keys())
142
 
@@ -146,7 +150,6 @@ def process_submission(model_name, csv_file):
146
  if extra_ids:
147
  return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
148
 
149
-
150
  try:
151
  avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
152
 
@@ -160,7 +163,6 @@ def process_submission(model_name, csv_file):
160
  leaderboard = pd.read_csv(leaderboard_file)
161
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
162
 
163
- # Calculate combined score (70% WER, 30% CER)
164
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
165
 
166
  new_entry = pd.DataFrame(
@@ -168,10 +170,13 @@ def process_submission(model_name, csv_file):
168
  columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
169
  )
170
 
 
171
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
172
  updated_leaderboard.to_csv(leaderboard_file, index=False)
173
 
174
- return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}, Combined Score: {combined_score:.4f}", updated_leaderboard
 
 
175
 
176
  except Exception as e:
177
  return f"Error processing submission: {str(e)}", None
@@ -194,9 +199,10 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
194
  if "Combined_Score" not in current_leaderboard.columns:
195
  current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
196
 
197
- current_leaderboard = current_leaderboard.sort_values("Combined_Score")
 
198
  except Exception:
199
- current_leaderboard = pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
200
 
201
  gr.Markdown("### Current ASR Model Rankings")
202
 
@@ -256,4 +262,4 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
256
  )
257
 
258
  if __name__ == "__main__":
259
- demo.launch(share=True)
 
17
  except Exception as e:
18
  references = {}
19
 
 
20
  leaderboard_file = "leaderboard.csv"
21
  if not os.path.exists(leaderboard_file):
22
  pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
23
  else:
24
  leaderboard_df = pd.read_csv(leaderboard_file)
25
 
 
 
 
 
 
26
  if "Combined_Score" not in leaderboard_df.columns:
27
  leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
28
  leaderboard_df.to_csv(leaderboard_file, index=False)
 
89
  if not results:
90
  raise ValueError("No valid samples for WER/CER calculation")
91
 
 
92
  avg_wer = sum(item["wer"] for item in results) / len(results)
93
  avg_cer = sum(item["cer"] for item in results) / len(results)
94
 
 
95
  weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
96
  weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
97
 
98
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
99
 
100
+ def add_ranking_numbers(df, sort_by="Combined_Score"):
101
+ """Add ranking numbers to the dataframe based on the sort column"""
102
+ if len(df) == 0:
103
+ return pd.DataFrame(columns=["Rank"] + list(df.columns))
104
+
105
+
106
+ sorted_df = df.sort_values(sort_by)
107
+ sorted_df.insert(0, "Rank", range(1, len(sorted_df) + 1))
108
+
109
+ return sorted_df
110
+
111
  def update_ranking(method):
112
  """Update leaderboard ranking based on selected method"""
113
  try:
 
116
  if "Combined_Score" not in current_lb.columns:
117
  current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
118
 
119
+ sort_column = "Combined_Score"
120
  if method == "WER Only":
121
+ sort_column = "WER"
122
  elif method == "CER Only":
123
+ sort_column = "CER"
124
+
125
+ return add_ranking_numbers(current_lb, sort_column)
126
+
127
  except Exception:
128
+ return pd.DataFrame(columns=["Rank", "Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
129
 
130
  def process_submission(model_name, csv_file):
131
  try:
 
141
  dup_ids = df[df["id"].duplicated()]["id"].unique()
142
  return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
143
 
 
144
  missing_ids = set(references.keys()) - set(df["id"])
145
  extra_ids = set(df["id"]) - set(references.keys())
146
 
 
150
  if extra_ids:
151
  return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
152
 
 
153
  try:
154
  avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
155
 
 
163
  leaderboard = pd.read_csv(leaderboard_file)
164
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
165
 
 
166
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
167
 
168
  new_entry = pd.DataFrame(
 
170
  columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
171
  )
172
 
173
+
174
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
175
  updated_leaderboard.to_csv(leaderboard_file, index=False)
176
 
177
+ ranked_leaderboard = add_ranking_numbers(updated_leaderboard)
178
+
179
+ return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}, Combined Score: {combined_score:.4f}", ranked_leaderboard
180
 
181
  except Exception as e:
182
  return f"Error processing submission: {str(e)}", None
 
199
  if "Combined_Score" not in current_leaderboard.columns:
200
  current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
201
 
202
+
203
+ current_leaderboard = add_ranking_numbers(current_leaderboard.sort_values("Combined_Score"))
204
  except Exception:
205
+ current_leaderboard = pd.DataFrame(columns=["Rank", "Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
206
 
207
  gr.Markdown("### Current ASR Model Rankings")
208
 
 
262
  )
263
 
264
  if __name__ == "__main__":
265
+ demo.launch()