sudoping01 commited on
Commit
2e23fb2
·
verified ·
1 Parent(s): 5f3b2ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -100
app.py CHANGED
@@ -8,65 +8,25 @@ import re
8
 
9
  from huggingface_hub import login
10
 
11
- # Authentication setup
12
  token = os.environ.get("HG_TOKEN")
13
- print(f"Token exists: {token is not None}")
14
- if token:
15
- print(f"Token length: {len(token)}")
16
- print(f"Token first few chars: {token[:4]}...")
17
  login(token)
18
 
19
  print("Loading dataset...")
20
- try:
21
- # Try loading without use_auth_token parameter since it's not accepted
22
- dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
23
- print(f"Successfully loaded dataset with {len(dataset)} samples")
24
- references = {row["id"]: row["text"] for row in dataset}
25
- except Exception as e:
26
- print(f"Error loading dataset: {str(e)}")
27
- try:
28
- # Second attempt with token passed differently
29
- from huggingface_hub import HfApi
30
- api = HfApi(token=token)
31
- dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
32
- print(f"Successfully loaded dataset with {len(dataset)} samples")
33
- references = {row["id"]: row["text"] for row in dataset}
34
- except Exception as e2:
35
- print(f"Second attempt error: {str(e2)}")
36
- # Fallback in case dataset can't be loaded
37
- references = {}
38
- print("WARNING: Using empty references dictionary due to dataset loading error")
39
 
40
- # Initialize leaderboard file with combined score
41
  leaderboard_file = "leaderboard.csv"
42
  if not os.path.exists(leaderboard_file):
43
- # Create empty leaderboard with necessary columns
44
  pd.DataFrame(columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
45
- print("Created new leaderboard file")
46
-
47
- # Add example entries so first-time visitors see something
48
- example_data = [
49
- ["Example Model 1", 0.35, 0.20, 0.305, "2023-01-01 00:00:00"],
50
- ["Example Model 2", 0.40, 0.18, 0.334, "2023-01-02 00:00:00"],
51
- ["Example Model 3", 0.32, 0.25, 0.299, "2023-01-03 00:00:00"]
52
- ]
53
- example_df = pd.DataFrame(
54
- example_data,
55
- columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]
56
- )
57
- example_df.to_csv(leaderboard_file, index=False)
58
- print("Added example data to empty leaderboard for demonstration")
59
  else:
60
- # Load existing leaderboard
61
  leaderboard_df = pd.read_csv(leaderboard_file)
62
 
63
- # Add Combined_Score column if it doesn't exist
64
  if "Combined_Score" not in leaderboard_df.columns:
65
- leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
66
  leaderboard_df.to_csv(leaderboard_file, index=False)
67
- print("Added Combined_Score column to existing leaderboard")
68
-
69
- print(f"Loaded existing leaderboard with {len(leaderboard_df)} entries")
70
 
71
  def normalize_text(text):
72
  """
@@ -79,15 +39,9 @@ def normalize_text(text):
79
  if not isinstance(text, str):
80
  text = str(text)
81
 
82
- # Convert to lowercase
83
  text = text.lower()
84
-
85
- # Remove punctuation, keeping spaces
86
  text = re.sub(r'[^\w\s]', '', text)
87
-
88
- # Normalize whitespace
89
  text = re.sub(r'\s+', ' ', text).strip()
90
-
91
  return text
92
 
93
  def calculate_metrics(predictions_df):
@@ -99,48 +53,32 @@ def calculate_metrics(predictions_df):
99
  for _, row in predictions_df.iterrows():
100
  id_val = row["id"]
101
  if id_val not in references:
102
- print(f"Warning: ID {id_val} not found in references")
103
  continue
104
 
105
  reference = normalize_text(references[id_val])
106
  hypothesis = normalize_text(row["text"])
107
 
108
- # Print detailed info for first few entries
109
- if len(results) < 5:
110
- print(f"ID: {id_val}")
111
- print(f"Reference: '{reference}'")
112
- print(f"Hypothesis: '{hypothesis}'")
113
 
114
- # Skip empty strings
115
  if not reference or not hypothesis:
116
  print(f"Warning: Empty reference or hypothesis for ID {id_val}")
117
  continue
118
 
119
- # Split into words for jiwer
120
  reference_words = reference.split()
121
  hypothesis_words = hypothesis.split()
122
  reference_chars = list(reference)
123
 
124
- if len(results) < 5:
125
- print(f"Reference words: {reference_words}")
126
- print(f"Hypothesis words: {hypothesis_words}")
127
-
128
- # Calculate metrics
129
  try:
130
- # Calculate WER and CER
131
  sample_wer = wer(reference, hypothesis)
132
  sample_cer = cer(reference, hypothesis)
133
 
134
- # Cap metrics at sensible values to prevent outliers
135
- sample_wer = min(sample_wer, 2.0) # Cap at 200% WER
136
- sample_cer = min(sample_cer, 2.0) # Cap at 200% CER
137
 
138
- # For weighted calculations
139
  total_ref_words += len(reference_words)
140
  total_ref_chars += len(reference_chars)
141
 
142
- if len(results) < 5:
143
- print(f"WER: {sample_wer}, CER: {sample_cer}")
144
 
145
  results.append({
146
  "id": id_val,
@@ -165,19 +103,14 @@ def calculate_metrics(predictions_df):
165
  weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
166
  weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
167
 
168
- print(f"Simple average WER: {avg_wer:.4f}, CER: {avg_cer:.4f}")
169
- print(f"Weighted average WER: {weighted_wer:.4f}, CER: {weighted_cer:.4f}")
170
- print(f"Processed {len(results)} valid samples")
171
-
172
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
173
 
174
  def update_ranking(method):
175
- """Update leaderboard ranking based on selected method"""
176
  current_lb = pd.read_csv(leaderboard_file)
177
 
178
- # Calculate combined score if not present
179
  if "Combined_Score" not in current_lb.columns:
180
- current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
181
 
182
  if method == "WER Only":
183
  return current_lb.sort_values("WER")
@@ -188,7 +121,7 @@ def update_ranking(method):
188
 
189
  def process_submission(submitter_name, csv_file):
190
  try:
191
- # Read and validate the uploaded CSV
192
  df = pd.read_csv(csv_file)
193
  print(f"Processing submission from {submitter_name} with {len(df)} rows")
194
 
@@ -216,25 +149,19 @@ def process_submission(submitter_name, csv_file):
216
  try:
217
  avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
218
 
219
- # Debug information
220
- print(f"Calculated metrics - WER: {avg_wer:.4f}, CER: {avg_cer:.4f}")
221
- print(f"Weighted metrics - WER: {weighted_wer:.4f}, CER: {weighted_cer:.4f}")
222
- print(f"Processed {len(detailed_results)} valid samples")
223
 
224
- # Check for suspiciously low values
225
  if avg_wer < 0.001:
226
- print("WARNING: WER is extremely low - likely an error")
227
  return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
228
 
229
  except Exception as e:
230
- print(f"Error in metrics calculation: {str(e)}")
231
  return f"Error calculating metrics: {str(e)}", None
232
 
233
- # Update the leaderboard
234
  leaderboard = pd.read_csv(leaderboard_file)
235
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
236
 
237
- # Calculate combined score (70% WER, 30% CER)
238
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
239
 
240
  new_entry = pd.DataFrame(
@@ -242,17 +169,15 @@ def process_submission(submitter_name, csv_file):
242
  columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]
243
  )
244
 
245
- # Add new entry to leaderboard
246
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
247
  updated_leaderboard.to_csv(leaderboard_file, index=False)
248
 
249
  return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}, Combined Score: {combined_score:.4f}", updated_leaderboard
250
 
251
  except Exception as e:
252
- print(f"Error processing submission: {str(e)}")
253
  return f"Error processing submission: {str(e)}", None
254
 
255
- # Create the Gradio interface
256
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
257
  gr.Markdown(
258
  """
@@ -263,22 +188,17 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
263
  """
264
  )
265
 
266
- # Load and display current leaderboard immediately
267
  with gr.Tabs() as tabs:
268
  with gr.TabItem("🏅 Current Rankings"):
269
- # Show current leaderboard rankings
270
  current_leaderboard = pd.read_csv(leaderboard_file)
271
 
272
- # Calculate combined score if not present
273
  if "Combined_Score" not in current_leaderboard.columns:
274
  current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
275
 
276
- # Sort by combined score
277
  current_leaderboard = current_leaderboard.sort_values("Combined_Score")
278
 
279
  gr.Markdown("### Current ASR Model Rankings")
280
 
281
- # Add radio buttons for ranking method
282
  ranking_method = gr.Radio(
283
  ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
284
  label="Ranking Method",
@@ -291,7 +211,6 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
291
  label="Models are ranked by selected metric - lower is better"
292
  )
293
 
294
- # Update leaderboard based on ranking method selection
295
  ranking_method.change(
296
  fn=update_ranking,
297
  inputs=[ranking_method],
@@ -335,9 +254,9 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
335
  outputs=[output_msg, leaderboard_display]
336
  )
337
 
338
- # Print startup message
339
  print("Starting Bambara ASR Leaderboard app...")
340
 
341
- # Launch the app
342
  if __name__ == "__main__":
343
  demo.launch(share=True)
 
8
 
9
  from huggingface_hub import login
10
 
11
+
12
  token = os.environ.get("HG_TOKEN")
 
 
 
 
13
  login(token)
14
 
15
  print("Loading dataset...")
16
+ dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
17
+ references = {row["id"]: row["text"] for row in dataset}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
19
  leaderboard_file = "leaderboard.csv"
20
  if not os.path.exists(leaderboard_file):
 
21
  pd.DataFrame(columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
22
+
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  else:
24
+
25
  leaderboard_df = pd.read_csv(leaderboard_file)
26
 
 
27
  if "Combined_Score" not in leaderboard_df.columns:
28
+ leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3 # WER 70% and CER 30%
29
  leaderboard_df.to_csv(leaderboard_file, index=False)
 
 
 
30
 
31
  def normalize_text(text):
32
  """
 
39
  if not isinstance(text, str):
40
  text = str(text)
41
 
 
42
  text = text.lower()
 
 
43
  text = re.sub(r'[^\w\s]', '', text)
 
 
44
  text = re.sub(r'\s+', ' ', text).strip()
 
45
  return text
46
 
47
  def calculate_metrics(predictions_df):
 
53
  for _, row in predictions_df.iterrows():
54
  id_val = row["id"]
55
  if id_val not in references:
 
56
  continue
57
 
58
  reference = normalize_text(references[id_val])
59
  hypothesis = normalize_text(row["text"])
60
 
 
 
 
 
 
61
 
 
62
  if not reference or not hypothesis:
63
  print(f"Warning: Empty reference or hypothesis for ID {id_val}")
64
  continue
65
 
 
66
  reference_words = reference.split()
67
  hypothesis_words = hypothesis.split()
68
  reference_chars = list(reference)
69
 
 
 
 
 
 
70
  try:
71
+
72
  sample_wer = wer(reference, hypothesis)
73
  sample_cer = cer(reference, hypothesis)
74
 
75
+ sample_wer = min(sample_wer, 2.0)
76
+ sample_cer = min(sample_cer, 2.0)
 
77
 
78
+
79
  total_ref_words += len(reference_words)
80
  total_ref_chars += len(reference_chars)
81
 
 
 
82
 
83
  results.append({
84
  "id": id_val,
 
103
  weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
104
  weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
105
 
 
 
 
 
106
  return avg_wer, avg_cer, weighted_wer, weighted_cer, results
107
 
108
  def update_ranking(method):
109
+
110
  current_lb = pd.read_csv(leaderboard_file)
111
 
 
112
  if "Combined_Score" not in current_lb.columns:
113
+ current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3 # 70% for WER
114
 
115
  if method == "WER Only":
116
  return current_lb.sort_values("WER")
 
121
 
122
  def process_submission(submitter_name, csv_file):
123
  try:
124
+
125
  df = pd.read_csv(csv_file)
126
  print(f"Processing submission from {submitter_name} with {len(df)} rows")
127
 
 
149
  try:
150
  avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
151
 
152
+
 
 
 
153
 
154
+ # suspiciously low values
155
  if avg_wer < 0.001:
 
156
  return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
157
 
158
  except Exception as e:
 
159
  return f"Error calculating metrics: {str(e)}", None
160
 
 
161
  leaderboard = pd.read_csv(leaderboard_file)
162
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
163
 
164
+ # (70% WER, 30% CER)
165
  combined_score = avg_wer * 0.7 + avg_cer * 0.3
166
 
167
  new_entry = pd.DataFrame(
 
169
  columns=["submitter", "WER", "CER", "Combined_Score", "timestamp"]
170
  )
171
 
 
172
  updated_leaderboard = pd.concat([leaderboard, new_entry]).sort_values("Combined_Score")
173
  updated_leaderboard.to_csv(leaderboard_file, index=False)
174
 
175
  return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}, Combined Score: {combined_score:.4f}", updated_leaderboard
176
 
177
  except Exception as e:
 
178
  return f"Error processing submission: {str(e)}", None
179
 
180
+
181
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
182
  gr.Markdown(
183
  """
 
188
  """
189
  )
190
 
 
191
  with gr.Tabs() as tabs:
192
  with gr.TabItem("🏅 Current Rankings"):
 
193
  current_leaderboard = pd.read_csv(leaderboard_file)
194
 
 
195
  if "Combined_Score" not in current_leaderboard.columns:
196
  current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
197
 
 
198
  current_leaderboard = current_leaderboard.sort_values("Combined_Score")
199
 
200
  gr.Markdown("### Current ASR Model Rankings")
201
 
 
202
  ranking_method = gr.Radio(
203
  ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
204
  label="Ranking Method",
 
211
  label="Models are ranked by selected metric - lower is better"
212
  )
213
 
 
214
  ranking_method.change(
215
  fn=update_ranking,
216
  inputs=[ranking_method],
 
254
  outputs=[output_msg, leaderboard_display]
255
  )
256
 
257
+
258
  print("Starting Bambara ASR Leaderboard app...")
259
 
260
+
261
  if __name__ == "__main__":
262
  demo.launch(share=True)