huckiyang commited on
Commit
381227f
·
1 Parent(s): c7f8633

optz the data code

Browse files
Files changed (2) hide show
  1. README.md +9 -0
  2. app.py +19 -7
README.md CHANGED
@@ -42,3 +42,12 @@ Word Error Rate is calculated between:
42
  Lower WER values indicate better transcription accuracy.
43
 
44
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
42
  Lower WER values indicate better transcription accuracy.
43
 
44
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
45
+
46
+ ## Table Structure
47
+
48
+ The leaderboard is displayed as a table with:
49
+
50
+ - **Rows**: "Number of Examples" and "Word Error Rate (WER)"
51
+ - **Columns**: Different data sources (CHiME4, CORAAL, CommonVoice, etc.) and OVERALL
52
+
53
+ Each cell shows the corresponding metric for that specific data source. The OVERALL column shows aggregate metrics across all sources.
app.py CHANGED
@@ -259,12 +259,18 @@ def get_wer_metrics(dataset):
259
 
260
  # Create a transposed DataFrame with metrics as rows and sources as columns
261
  metrics = ["Count", "No LM Baseline"]
262
- result_df = pd.DataFrame(index=metrics, columns=all_sources + ["OVERALL"])
 
 
 
263
 
264
  for source in all_sources + ["OVERALL"]:
265
  for metric in metrics:
266
  result_df.loc[metric, source] = source_results[source][metric]
267
 
 
 
 
268
  return result_df
269
 
270
  except Exception as e:
@@ -278,17 +284,23 @@ def format_dataframe(df):
278
  # Use vectorized operations instead of apply
279
  df = df.copy()
280
 
281
- # Format WER values
282
- if "No LM Baseline" in df.index:
 
 
 
 
 
 
283
  # Convert to object type first to avoid warnings
284
- df.loc["No LM Baseline"] = df.loc["No LM Baseline"].astype(object)
285
 
286
  for col in df.columns:
287
- value = df.loc["No LM Baseline", col]
288
  if pd.notna(value):
289
- df.loc["No LM Baseline", col] = f"{value:.4f}"
290
  else:
291
- df.loc["No LM Baseline", col] = "N/A"
292
 
293
  return df
294
 
 
259
 
260
  # Create a transposed DataFrame with metrics as rows and sources as columns
261
  metrics = ["Count", "No LM Baseline"]
262
+ result_df = pd.DataFrame(index=metrics, columns=["Metric"] + all_sources + ["OVERALL"])
263
+
264
+ # Add descriptive column
265
+ result_df["Metric"] = ["Number of Examples", "Word Error Rate (WER)"]
266
 
267
  for source in all_sources + ["OVERALL"]:
268
  for metric in metrics:
269
  result_df.loc[metric, source] = source_results[source][metric]
270
 
271
+ # Set Metric as index for better display
272
+ result_df = result_df.set_index("Metric")
273
+
274
  return result_df
275
 
276
  except Exception as e:
 
284
  # Use vectorized operations instead of apply
285
  df = df.copy()
286
 
287
+ # Find the row containing WER values (now with new index name)
288
+ wer_row_index = None
289
+ for idx in df.index:
290
+ if "WER" in idx or "Error Rate" in idx:
291
+ wer_row_index = idx
292
+ break
293
+
294
+ if wer_row_index:
295
  # Convert to object type first to avoid warnings
296
+ df.loc[wer_row_index] = df.loc[wer_row_index].astype(object)
297
 
298
  for col in df.columns:
299
+ value = df.loc[wer_row_index, col]
300
  if pd.notna(value):
301
+ df.loc[wer_row_index, col] = f"{value:.4f}"
302
  else:
303
+ df.loc[wer_row_index, col] = "N/A"
304
 
305
  return df
306