sudoping01 commited on
Commit
c726970
·
verified ·
1 Parent(s): 3769468

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -114
app.py CHANGED
@@ -7,16 +7,13 @@ from datetime import datetime
7
  import re
8
 
9
  # Load the Bambara ASR dataset
10
- print("Loading dataset...")
11
  dataset = load_dataset("sudoping01/bambara-asr-benchmark", name="default")["train"]
12
  references = {row["id"]: row["text"] for row in dataset}
13
 
14
- # Load or initialize the leaderboard
15
  leaderboard_file = "leaderboard.csv"
16
  if not os.path.exists(leaderboard_file):
17
  pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
18
- else:
19
- print(f"Loaded existing leaderboard with {len(pd.read_csv(leaderboard_file))} entries")
20
 
21
  def normalize_text(text):
22
  """
@@ -28,132 +25,66 @@ def normalize_text(text):
28
  """
29
  if not isinstance(text, str):
30
  text = str(text)
31
-
32
- # Convert to lowercase
33
  text = text.lower()
34
-
35
- # Remove punctuation, keeping spaces
36
  text = re.sub(r'[^\w\s]', '', text)
37
-
38
- # Normalize whitespace
39
  text = re.sub(r'\s+', ' ', text).strip()
40
-
41
  return text
42
 
43
  def calculate_metrics(predictions_df):
44
- """Calculate WER and CER for predictions."""
45
  results = []
46
-
47
  for _, row in predictions_df.iterrows():
48
  id_val = row["id"]
49
  if id_val not in references:
50
- print(f"Warning: ID {id_val} not found in references")
51
  continue
52
-
53
  reference = normalize_text(references[id_val])
54
  hypothesis = normalize_text(row["text"])
55
-
56
- # Print detailed info for first few entries
57
- if len(results) < 5:
58
- print(f"ID: {id_val}")
59
- print(f"Reference: '{reference}'")
60
- print(f"Hypothesis: '{hypothesis}'")
61
-
62
- # Skip empty strings
63
  if not reference or not hypothesis:
64
- print(f"Warning: Empty reference or hypothesis for ID {id_val}")
65
  continue
66
-
67
- # Split into words for jiwer
68
- reference_words = reference.split()
69
- hypothesis_words = hypothesis.split()
70
-
71
- if len(results) < 5:
72
- print(f"Reference words: {reference_words}")
73
- print(f"Hypothesis words: {hypothesis_words}")
74
-
75
- # Calculate metrics
76
  try:
77
- # Make sure we're not comparing identical strings
78
- if reference == hypothesis:
79
- print(f"Warning: Identical strings for ID {id_val}")
80
- # Force a small difference if the strings are identical
81
- # This is for debugging - remove in production if needed
82
- if len(hypothesis_words) > 0:
83
- # Add a dummy word to force non-zero WER
84
- hypothesis_words.append("dummy_debug_token")
85
- hypothesis = " ".join(hypothesis_words)
86
-
87
- # Calculate WER and CER
88
  sample_wer = wer(reference, hypothesis)
89
  sample_cer = cer(reference, hypothesis)
90
-
91
- if len(results) < 5:
92
- print(f"WER: {sample_wer}, CER: {sample_cer}")
93
-
94
  results.append({
95
  "id": id_val,
96
- "reference": reference,
97
- "hypothesis": hypothesis,
98
  "wer": sample_wer,
99
  "cer": sample_cer
100
  })
101
- except Exception as e:
102
- print(f"Error calculating metrics for ID {id_val}: {str(e)}")
103
-
104
  if not results:
105
- raise ValueError("No valid samples for WER/CER calculation")
106
-
107
- # Calculate average metrics
108
  avg_wer = sum(item["wer"] for item in results) / len(results)
109
  avg_cer = sum(item["cer"] for item in results) / len(results)
110
-
111
  return avg_wer, avg_cer, results
112
 
113
  def process_submission(submitter_name, csv_file):
 
114
  try:
115
- # Read and validate the uploaded CSV
116
  df = pd.read_csv(csv_file)
117
- print(f"Processing submission from {submitter_name} with {len(df)} rows")
118
-
119
  if len(df) == 0:
120
- return "Error: Uploaded CSV is empty.", None
121
-
122
  if set(df.columns) != {"id", "text"}:
123
- return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None
124
-
125
  if df["id"].duplicated().any():
126
- dup_ids = df[df["id"].duplicated()]["id"].unique()
127
- return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
128
-
129
- # Check if IDs match the reference dataset
130
  missing_ids = set(references.keys()) - set(df["id"])
131
  extra_ids = set(df["id"]) - set(references.keys())
132
-
133
  if missing_ids:
134
- return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None
135
-
136
  if extra_ids:
137
- return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
 
 
 
138
 
139
- # Calculate WER and CER
140
- try:
141
- avg_wer, avg_cer, detailed_results = calculate_metrics(df)
142
-
143
- # Debug information
144
- print(f"Calculated metrics - WER: {avg_wer:.4f}, CER: {avg_cer:.4f}")
145
- print(f"Processed {len(detailed_results)} valid samples")
146
-
147
- # Check for suspiciously low values
148
- if avg_wer < 0.001:
149
- print("WARNING: WER is extremely low - likely an error")
150
- return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
151
-
152
- except Exception as e:
153
- print(f"Error in metrics calculation: {str(e)}")
154
- return f"Error calculating metrics: {str(e)}", None
155
 
156
- # Update the leaderboard
157
  leaderboard = pd.read_csv(leaderboard_file)
158
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
159
  new_entry = pd.DataFrame(
@@ -163,46 +94,74 @@ def process_submission(submitter_name, csv_file):
163
  leaderboard = pd.concat([leaderboard, new_entry]).sort_values("WER")
164
  leaderboard.to_csv(leaderboard_file, index=False)
165
 
166
- return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}", leaderboard
 
 
 
167
 
 
 
168
  except Exception as e:
169
- print(f"Error processing submission: {str(e)}")
170
- return f"Error processing submission: {str(e)}", None
171
 
172
- # Create the Gradio interface
173
- with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
 
 
 
 
 
 
 
 
 
174
  gr.Markdown(
175
  """
176
- # Bambara ASR Leaderboard
177
- Upload a CSV file with 'id' and 'text' columns to evaluate your ASR predictions.
178
- The 'id's must match those in the dataset.
179
- [View the dataset here](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset).
180
- - **WER**: Word Error Rate (lower is better).
181
- - **CER**: Character Error Rate (lower is better).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  """
183
  )
184
-
185
  with gr.Row():
186
- submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
187
- csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
188
-
189
- submit_btn = gr.Button("Submit")
190
- output_msg = gr.Textbox(label="Status", interactive=False)
191
  leaderboard_display = gr.DataFrame(
192
- label="Leaderboard",
193
- value=pd.read_csv(leaderboard_file),
194
  interactive=False
195
  )
196
-
197
  submit_btn.click(
198
  fn=process_submission,
199
  inputs=[submitter, csv_upload],
200
  outputs=[output_msg, leaderboard_display]
201
  )
202
 
203
- # Print startup message
204
- print("Starting Bambara ASR Leaderboard app...")
205
-
206
- # Launch the app
207
  if __name__ == "__main__":
208
  demo.launch(share=True)
 
7
  import re
8
 
9
  # Load the Bambara ASR dataset
 
10
  dataset = load_dataset("sudoping01/bambara-asr-benchmark", name="default")["train"]
11
  references = {row["id"]: row["text"] for row in dataset}
12
 
13
+ # Initialize leaderboard file if it doesn't exist
14
  leaderboard_file = "leaderboard.csv"
15
  if not os.path.exists(leaderboard_file):
16
  pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
 
 
17
 
18
  def normalize_text(text):
19
  """
 
25
  """
26
  if not isinstance(text, str):
27
  text = str(text)
 
 
28
  text = text.lower()
 
 
29
  text = re.sub(r'[^\w\s]', '', text)
 
 
30
  text = re.sub(r'\s+', ' ', text).strip()
 
31
  return text
32
 
33
  def calculate_metrics(predictions_df):
34
+ """Calculate WER and CER for predictions against reference dataset."""
35
  results = []
 
36
  for _, row in predictions_df.iterrows():
37
  id_val = row["id"]
38
  if id_val not in references:
 
39
  continue
 
40
  reference = normalize_text(references[id_val])
41
  hypothesis = normalize_text(row["text"])
 
 
 
 
 
 
 
 
42
  if not reference or not hypothesis:
 
43
  continue
 
 
 
 
 
 
 
 
 
 
44
  try:
 
 
 
 
 
 
 
 
 
 
 
45
  sample_wer = wer(reference, hypothesis)
46
  sample_cer = cer(reference, hypothesis)
 
 
 
 
47
  results.append({
48
  "id": id_val,
 
 
49
  "wer": sample_wer,
50
  "cer": sample_cer
51
  })
52
+ except Exception:
53
+ pass # Skip invalid samples silently
 
54
  if not results:
55
+ raise ValueError("No valid samples available for metric calculation")
 
 
56
  avg_wer = sum(item["wer"] for item in results) / len(results)
57
  avg_cer = sum(item["cer"] for item in results) / len(results)
 
58
  return avg_wer, avg_cer, results
59
 
60
  def process_submission(submitter_name, csv_file):
61
+ """Process the uploaded CSV, calculate metrics, and update the leaderboard."""
62
  try:
 
63
  df = pd.read_csv(csv_file)
 
 
64
  if len(df) == 0:
65
+ return "Submission failed: The uploaded CSV file is empty. Please upload a valid CSV file with predictions.", None
 
66
  if set(df.columns) != {"id", "text"}:
67
+ return f"Submission failed: The CSV file must contain exactly two columns: 'id' and 'text'. Found: {', '.join(df.columns)}", None
 
68
  if df["id"].duplicated().any():
69
+ dup_ids = df[df["id"].duplicated(keep=False)]["id"].unique()
70
+ return f"Submission failed: Duplicate 'id' values detected: {', '.join(map(str, dup_ids[:5]))}", None
 
 
71
  missing_ids = set(references.keys()) - set(df["id"])
72
  extra_ids = set(df["id"]) - set(references.keys())
 
73
  if missing_ids:
74
+ return f"Submission failed: Missing {len(missing_ids)} required 'id' values. First few: {', '.join(map(str, list(missing_ids)[:5]))}", None
 
75
  if extra_ids:
76
+ return f"Submission failed: Found {len(extra_ids)} unrecognized 'id' values. First few: {', '.join(map(str, list(extra_ids)[:5]))}", None
77
+ empty_ids = [row["id"] for _, row in df.iterrows() if not normalize_text(row["text"])]
78
+ if empty_ids:
79
+ return f"Submission failed: Empty transcriptions detected for {len(empty_ids)} 'id' values. First few: {', '.join(map(str, empty_ids[:5]))}", None
80
 
81
+ # Calculate metrics
82
+ avg_wer, avg_cer, detailed_results = calculate_metrics(df)
83
+ n_valid = len(detailed_results)
84
+ if n_valid == 0:
85
+ return "Submission failed: No valid samples found for metric calculation.", None
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ # Update leaderboard
88
  leaderboard = pd.read_csv(leaderboard_file)
89
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
90
  new_entry = pd.DataFrame(
 
94
  leaderboard = pd.concat([leaderboard, new_entry]).sort_values("WER")
95
  leaderboard.to_csv(leaderboard_file, index=False)
96
 
97
+ # Format leaderboard for display
98
+ display_leaderboard = leaderboard.copy()
99
+ display_leaderboard["WER"] = display_leaderboard["WER"].apply(lambda x: f"{x:.4f}")
100
+ display_leaderboard["CER"] = display_leaderboard["CER"].apply(lambda x: f"{x:.4f}")
101
 
102
+ return f"Your submission has been successfully processed. Evaluated {n_valid} valid samples. WER: {avg_wer:.4f}, CER: {avg_cer:.4f}", display_leaderboard
103
+
104
  except Exception as e:
105
+ return f"Submission failed: An error occurred while processing your file - {str(e)}", None
 
106
 
107
+ def load_and_format_leaderboard():
108
+ """Load the leaderboard and format WER/CER for display."""
109
+ if os.path.exists(leaderboard_file):
110
+ leaderboard = pd.read_csv(leaderboard_file)
111
+ leaderboard["WER"] = leaderboard["WER"].apply(lambda x: f"{x:.4f}")
112
+ leaderboard["CER"] = leaderboard["CER"].apply(lambda x: f"{x:.4f}")
113
+ return leaderboard
114
+ return pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"])
115
+
116
+ # Gradio interface
117
+ with gr.Blocks(title="Bambara ASR Benchmark Leaderboard") as demo:
118
  gr.Markdown(
119
  """
120
+ ## Bambara ASR Benchmark Leaderboard
121
+
122
+ **Welcome to the Bambara Automatic Speech Recognition (ASR) Benchmark Leaderboard**
123
+ Evaluate your ASR model's performance on the Bambara language dataset.
124
+
125
+ ### Submission Instructions
126
+ 1. Prepare a CSV file with two columns:
127
+ - **`id`**: Must match identifiers in the official dataset.
128
+ - **`text`**: Your model's transcription predictions.
129
+ 2. Ensure the CSV file meets these requirements:
130
+ - Contains only `id` and `text` columns.
131
+ - No duplicate `id` values.
132
+ - All `id` values match dataset entries.
133
+ 3. Upload your CSV file below.
134
+
135
+ ### Dataset
136
+ Access the official dataset: [Bambara ASR Dataset](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset)
137
+
138
+ ### Evaluation Metrics
139
+ - **Word Error Rate (WER)**: Word-level transcription accuracy (lower is better).
140
+ - **Character Error Rate (CER)**: Character-level accuracy (lower is better).
141
+
142
+ ### Leaderboard
143
+ Submissions are ranked by WER and include:
144
+ - Submitter name
145
+ - WER (4 decimal places)
146
+ - CER (4 decimal places)
147
+ - Submission timestamp
148
  """
149
  )
 
150
  with gr.Row():
151
+ submitter = gr.Textbox(label="Submitter Name or Model Identifier", placeholder="e.g., MALIBA-AI/asr")
152
+ csv_upload = gr.File(label="Upload Prediction CSV File", file_types=[".csv"])
153
+ submit_btn = gr.Button("Evaluate Submission")
154
+ output_msg = gr.Textbox(label="Submission Status", interactive=False)
 
155
  leaderboard_display = gr.DataFrame(
156
+ label="Current Leaderboard",
157
+ value=load_and_format_leaderboard(),
158
  interactive=False
159
  )
 
160
  submit_btn.click(
161
  fn=process_submission,
162
  inputs=[submitter, csv_upload],
163
  outputs=[output_msg, leaderboard_display]
164
  )
165
 
 
 
 
 
166
  if __name__ == "__main__":
167
  demo.launch(share=True)