huckiyang commited on
Commit
d9795b9
·
1 Parent(s): 44ea2d4

optz the data loading

Browse files
Files changed (1) hide show
  1. app.py +89 -94
app.py CHANGED
@@ -37,114 +37,107 @@ def preprocess_text(text):
37
  text = re.sub(r'\s+', ' ', text).strip()
38
  return text
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Calculate WER for a group of examples
41
  def calculate_wer(examples):
42
  if not examples:
43
  return 0.0
44
 
45
  try:
46
- # First, let's examine the first example in detail
47
- if examples and len(examples) > 0:
 
 
 
48
  example = examples[0]
49
- print("\n===== EXAMPLE DATA INSPECTION =====")
50
- print(f"Keys in example: {example.keys()}")
51
-
52
- # Try different possible field names
53
- possible_reference_fields = ["transcription", "reference", "ground_truth", "target"]
54
- possible_hypothesis_fields = ["input1", "hypothesis", "asr_output", "source_text"]
55
-
56
- for field in possible_reference_fields:
57
- if field in example:
58
- print(f"Reference field '{field}' found with value: {str(example[field])[:100]}...")
59
 
60
- for field in possible_hypothesis_fields:
61
- if field in example:
62
- print(f"Hypothesis field '{field}' found with value: {str(example[field])[:100]}...")
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- # Filter valid examples in a single pass
65
- valid_pairs = []
66
 
67
- for ex in examples:
 
 
 
 
 
 
 
 
68
  try:
69
- # First try the expected field names
70
- if "transcription" in ex and "input1" in ex:
71
- reference = ex["transcription"]
72
- hypothesis = ex["input1"]
73
- # Try alternate field pairs if the standard ones don't exist
74
- elif "transcription" in ex and "hypothesis_concatenated" in ex and ex["hypothesis_concatenated"]:
75
- reference = ex["transcription"]
76
- hypothesis = ex["hypothesis_concatenated"].split('.')[0] # Take first sentence
77
- elif "reference" in ex and "hypothesis" in ex:
78
- reference = ex["reference"]
79
- hypothesis = ex["hypothesis"]
80
- else:
81
- continue # Skip this example if we can't find matching fields
82
 
83
- # Clean and preprocess the text
84
- reference = preprocess_text(reference)
85
- hypothesis = preprocess_text(hypothesis)
86
 
87
- # Only add if both have valid content
 
 
 
 
88
  if reference and hypothesis:
89
- valid_pairs.append((reference, hypothesis))
 
90
  except Exception as ex_error:
91
  print(f"Error processing example: {str(ex_error)}")
92
  continue
93
 
94
- if not valid_pairs:
 
95
  print("No valid pairs found for WER calculation")
96
  return np.nan
97
 
98
- # Print sample pairs for debugging
99
- print(f"\nSample pair for WER calculation:")
100
- print(f"Reference: '{valid_pairs[0][0]}'")
101
- print(f"Hypothesis: '{valid_pairs[0][1]}'")
102
- print(f"Total valid pairs: {len(valid_pairs)}")
103
-
104
- # Make sure we have enough valid examples
105
- if len(valid_pairs) < 5:
106
- print("WARNING: Very few valid pairs for WER calculation")
107
- if len(valid_pairs) < 2:
108
- print("Not enough data for reliable WER calculation")
109
- return np.nan
110
-
111
- # Unzip the pairs
112
- references, hypotheses = zip(*valid_pairs)
113
-
114
- # Calculate WER with additional transforms
115
- try:
116
- # Set up transformation pipeline for jiwer
117
- transformation = jiwer.Compose([
118
- jiwer.ToLowerCase(),
119
- jiwer.RemoveMultipleSpaces(),
120
- jiwer.Strip(),
121
- jiwer.RemovePunctuation(),
122
- jiwer.ReduceToListOfWords()
123
- ])
124
-
125
- # Calculate WER with transformations
126
- wer = jiwer.wer(
127
- references,
128
- hypotheses,
129
- truth_transform=transformation,
130
- hypothesis_transform=transformation
131
- )
132
-
133
- print(f"Successfully calculated WER: {wer}")
134
- return wer
135
- except Exception as wer_error:
136
- print(f"Error calculating WER with jiwer: {str(wer_error)}")
137
-
138
- # Fallback: Calculate character error rate manually for one sample
139
- try:
140
- if valid_pairs:
141
- ref = valid_pairs[0][0]
142
- hyp = valid_pairs[0][1]
143
- distance = jiwer.transforms.cer(ref, hyp)
144
- print(f"Fallback CER for first sample: {distance}")
145
- return np.nan
146
- except:
147
- return np.nan
148
 
149
  except Exception as e:
150
  print(f"Error in calculate_wer: {str(e)}")
@@ -163,14 +156,14 @@ def get_wer_metrics(dataset):
163
  examples_by_source = {}
164
 
165
  # Process all examples
166
- for ex in dataset:
167
  try:
168
  source = ex.get("source", "unknown")
169
  if source not in examples_by_source:
170
  examples_by_source[source] = []
171
  examples_by_source[source].append(ex)
172
  except Exception as e:
173
- print(f"Error processing example: {str(e)}")
174
  continue
175
 
176
  # Get all unique sources
@@ -186,7 +179,7 @@ def get_wer_metrics(dataset):
186
 
187
  if count > 0:
188
  print(f"\nCalculating WER for source {source} with {count} examples")
189
- wer = calculate_wer(examples[:100]) # Start with a sample for debugging
190
  else:
191
  wer = np.nan
192
 
@@ -207,9 +200,10 @@ def get_wer_metrics(dataset):
207
  try:
208
  total_count = len(dataset)
209
  print(f"\nCalculating overall WER with a sample of examples")
210
- # Use a sample for overall calculation to avoid overloading
211
- sample_size = min(1000, total_count)
212
- overall_wer = calculate_wer(dataset[:sample_size])
 
213
 
214
  results.append({
215
  "Source": "OVERALL",
@@ -218,6 +212,7 @@ def get_wer_metrics(dataset):
218
  })
219
  except Exception as e:
220
  print(f"Error calculating overall metrics: {str(e)}")
 
221
  results.append({
222
  "Source": "OVERALL",
223
  "Count": len(dataset),
@@ -294,4 +289,4 @@ with gr.Blocks(title="ASR Text Correction Test Leaderboard") as demo:
294
  refresh_btn.click(refresh_and_report, outputs=[leaderboard, error_output])
295
 
296
  if __name__ == "__main__":
297
- demo.launch()
 
37
  text = re.sub(r'\s+', ' ', text).strip()
38
  return text
39
 
40
+ # Simple WER calculation
41
+ def calculate_simple_wer(reference, hypothesis):
42
+ """Calculate WER using a simple word-based approach"""
43
+ if not reference or not hypothesis:
44
+ return 1.0 # Maximum error if either is empty
45
+
46
+ # Split into words
47
+ ref_words = reference.split()
48
+ hyp_words = hypothesis.split()
49
+
50
+ # Levenshtein distance at the word level
51
+ # This is a simple implementation and may not be as accurate as jiwer
52
+ from jiwer.measures import _levenshtein_distance
53
+ distance = _levenshtein_distance(ref_words, hyp_words)
54
+
55
+ # WER calculation
56
+ if len(ref_words) == 0:
57
+ return 1.0
58
+ return float(distance) / float(len(ref_words))
59
+
60
  # Calculate WER for a group of examples
61
  def calculate_wer(examples):
62
  if not examples:
63
  return 0.0
64
 
65
  try:
66
+ # Check if examples is a Dataset or a list
67
+ is_dataset = hasattr(examples, 'features')
68
+
69
+ # Get the first example for inspection
70
+ if is_dataset and len(examples) > 0:
71
  example = examples[0]
72
+ elif not is_dataset and len(examples) > 0:
73
+ example = examples[0]
74
+ else:
75
+ print("No examples found")
76
+ return np.nan
 
 
 
 
 
77
 
78
+ print("\n===== EXAMPLE DATA INSPECTION =====")
79
+ print(f"Keys in example: {example.keys()}")
80
+
81
+ # Try different possible field names
82
+ possible_reference_fields = ["transcription", "reference", "ground_truth", "target"]
83
+ possible_hypothesis_fields = ["input1", "hypothesis", "asr_output", "source_text"]
84
+
85
+ for field in possible_reference_fields:
86
+ if field in example:
87
+ print(f"Reference field '{field}' found with value: {str(example[field])[:100]}...")
88
+
89
+ for field in possible_hypothesis_fields:
90
+ if field in example:
91
+ print(f"Hypothesis field '{field}' found with value: {str(example[field])[:100]}...")
92
 
93
+ # Process each example in the dataset
94
+ wer_values = []
95
 
96
+ # Determine how to iterate based on type
97
+ items_to_process = examples
98
+ if is_dataset:
99
+ # Limit to first 200 examples for efficiency
100
+ items_to_process = examples.select(range(min(200, len(examples))))
101
+ else:
102
+ items_to_process = examples[:200] # First 200 examples
103
+
104
+ for ex in items_to_process:
105
  try:
106
+ # Try to get transcription and input1
107
+ transcription = ex.get("transcription")
108
+
109
+ # First try input1, then use first element from hypothesis if available
110
+ input1 = ex.get("input1")
111
+ if input1 is None and "hypothesis" in ex and ex["hypothesis"]:
112
+ if isinstance(ex["hypothesis"], list) and len(ex["hypothesis"]) > 0:
113
+ input1 = ex["hypothesis"][0]
114
+ elif isinstance(ex["hypothesis"], str):
115
+ input1 = ex["hypothesis"]
 
 
 
116
 
117
+ # Skip if either field is missing
118
+ if not transcription or not input1:
119
+ continue
120
 
121
+ # Clean the text
122
+ reference = preprocess_text(transcription)
123
+ hypothesis = preprocess_text(input1)
124
+
125
+ # Calculate WER for this pair
126
  if reference and hypothesis:
127
+ pair_wer = calculate_simple_wer(reference, hypothesis)
128
+ wer_values.append(pair_wer)
129
  except Exception as ex_error:
130
  print(f"Error processing example: {str(ex_error)}")
131
  continue
132
 
133
+ # Calculate average WER
134
+ if not wer_values:
135
  print("No valid pairs found for WER calculation")
136
  return np.nan
137
 
138
+ avg_wer = np.mean(wer_values)
139
+ print(f"Calculated {len(wer_values)} pairs with average WER: {avg_wer:.4f}")
140
+ return avg_wer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  except Exception as e:
143
  print(f"Error in calculate_wer: {str(e)}")
 
156
  examples_by_source = {}
157
 
158
  # Process all examples
159
+ for i, ex in enumerate(dataset):
160
  try:
161
  source = ex.get("source", "unknown")
162
  if source not in examples_by_source:
163
  examples_by_source[source] = []
164
  examples_by_source[source].append(ex)
165
  except Exception as e:
166
+ print(f"Error processing example {i}: {str(e)}")
167
  continue
168
 
169
  # Get all unique sources
 
179
 
180
  if count > 0:
181
  print(f"\nCalculating WER for source {source} with {count} examples")
182
+ wer = calculate_wer(examples) # Now handles both lists and datasets
183
  else:
184
  wer = np.nan
185
 
 
200
  try:
201
  total_count = len(dataset)
202
  print(f"\nCalculating overall WER with a sample of examples")
203
+ # Sample for calculation
204
+ sample_size = min(500, total_count)
205
+ sample_dataset = dataset.select(range(sample_size))
206
+ overall_wer = calculate_wer(sample_dataset)
207
 
208
  results.append({
209
  "Source": "OVERALL",
 
212
  })
213
  except Exception as e:
214
  print(f"Error calculating overall metrics: {str(e)}")
215
+ print(traceback.format_exc())
216
  results.append({
217
  "Source": "OVERALL",
218
  "Count": len(dataset),
 
289
  refresh_btn.click(refresh_and_report, outputs=[leaderboard, error_output])
290
 
291
  if __name__ == "__main__":
292
+ demo.launch()