David Pomerenke commited on
Commit
4973af4
·
1 Parent(s): 8633921

Add ASR ChrF scores

Browse files
Files changed (3) hide show
  1. app.py +11 -1
  2. evals.py +8 -3
  3. results.json +50 -25
app.py CHANGED
@@ -64,13 +64,23 @@ METRICS = {
64
  "display_name": "Automatic Speech Recognition (WER)",
65
  "field_name": "asr_wer",
66
  "label": "WER",
67
- "explanation": """
68
  **Automatic Speech Recognition Word Error Rate**: Measures the accuracy of speech-to-text transcription.
69
  It calculates the minimum number of word edits (insertions, deletions, substitutions) needed to transform the
70
  transcription into the reference text, divided by the number of words in the reference.
71
  Lower scores indicate better performance, with 0 being perfect transcription.
72
  """,
73
  },
 
 
 
 
 
 
 
 
 
 
74
  }
75
 
76
 
 
64
  "display_name": "Automatic Speech Recognition (WER)",
65
  "field_name": "asr_wer",
66
  "label": "WER",
67
+ "explanation": """
68
  **Automatic Speech Recognition Word Error Rate**: Measures the accuracy of speech-to-text transcription.
69
  It calculates the minimum number of word edits (insertions, deletions, substitutions) needed to transform the
70
  transcription into the reference text, divided by the number of words in the reference.
71
  Lower scores indicate better performance, with 0 being perfect transcription.
72
  """,
73
  },
74
+ "asr_chrf": {
75
+ "display_name": "Automatic Speech Recognition ChrF",
76
+ "field_name": "asr_chrf",
77
+ "label": "ChrF",
78
+ "explanation": """
79
+ **Automatic Speech Recognition ChrF**: Character n-gram F-score evaluates translations at the character level rather than word level.
80
+ This metric is particularly valuable for morphologically rich languages and can better capture partial word matches.
81
+ Higher scores (0-1) indicate better translations.
82
+ """,
83
+ },
84
  }
85
 
86
 
evals.py CHANGED
@@ -421,11 +421,13 @@ async def transcribe_and_evaluate(model, language_bcp_47, nr):
421
  item = fleurs.iloc[nr]
422
  path = f"data/fleurs/{language.fleurs_tag}/audio/dev/{item.fname}"
423
  pred = await transcribe(path, model=model)
424
- score = wer.compute(predictions=[pred], references=[item.transcription])
 
425
  return {
426
  "model": model,
427
  "bcp_47": language["bcp_47"],
428
- "asr_wer": score,
 
429
  "sentence_nr": nr,
430
  }
431
 
@@ -532,12 +534,14 @@ async def main():
532
  if not scores_asr:
533
  continue
534
  asr_wer = mean([s["asr_wer"] for s in scores_asr])
 
535
  results.append(
536
  {
537
  "model": model,
538
  "model_type": "speech-to-text",
539
  "asr_wer": asr_wer,
540
- "overall_score": asr_wer,
 
541
  }
542
  )
543
  if results:
@@ -561,6 +565,7 @@ async def main():
561
  "cls_acc",
562
  "mlm_chrf",
563
  "asr_wer",
 
564
  "overall_score",
565
  ]:
566
  language_results[score] = mean(
 
421
  item = fleurs.iloc[nr]
422
  path = f"data/fleurs/{language.fleurs_tag}/audio/dev/{item.fname}"
423
  pred = await transcribe(path, model=model)
424
+ wer_score = wer.compute(predictions=[pred], references=[item.transcription])
425
+ chrf_score = chrf.compute(predictions=[pred], references=[item.transcription])
426
  return {
427
  "model": model,
428
  "bcp_47": language["bcp_47"],
429
+ "asr_wer": wer_score,
430
+ "asr_chrf": chrf_score["score"],
431
  "sentence_nr": nr,
432
  }
433
 
 
534
  if not scores_asr:
535
  continue
536
  asr_wer = mean([s["asr_wer"] for s in scores_asr])
537
+ asr_chrf = mean([s["asr_chrf"] for s in scores_asr])
538
  results.append(
539
  {
540
  "model": model,
541
  "model_type": "speech-to-text",
542
  "asr_wer": asr_wer,
543
+ "asr_chrf": asr_chrf,
544
+ "overall_score": (asr_wer + asr_chrf) / 2,
545
  }
546
  )
547
  if results:
 
565
  "cls_acc",
566
  "mlm_chrf",
567
  "asr_wer",
568
+ "asr_chrf",
569
  "overall_score",
570
  ]:
571
  language_results[score] = mean(
results.json CHANGED
@@ -53,13 +53,15 @@
53
  "model": "elevenlabs/scribe_v1",
54
  "model_type": "speech-to-text",
55
  "asr_wer": 0.34916319968417603,
56
- "overall_score": 0.34916319968417603
 
57
  },
58
  {
59
  "model": "openai/whisper-large-v3",
60
  "model_type": "speech-to-text",
61
  "asr_wer": 0.25418986127300397,
62
- "overall_score": 0.25418986127300397
 
63
  }
64
  ],
65
  "commonvoice_hours": 2651.0,
@@ -227,7 +229,8 @@
227
  "cls_acc": 0.6533333333333333,
228
  "mlm_chrf": 93.48244773503015,
229
  "asr_wer": 0.30167653047859,
230
- "overall_score": 0.6212765331549852
 
231
  },
232
  {
233
  "language_name": "Chinese",
@@ -283,13 +286,15 @@
283
  "model": "elevenlabs/scribe_v1",
284
  "model_type": "speech-to-text",
285
  "asr_wer": 1.0,
286
- "overall_score": 1.0
 
287
  },
288
  {
289
  "model": "openai/whisper-large-v3",
290
  "model_type": "speech-to-text",
291
  "asr_wer": 1.0,
292
- "overall_score": 1.0
 
293
  }
294
  ],
295
  "commonvoice_hours": 422.0,
@@ -322,7 +327,8 @@
322
  "cls_acc": 0.6666666666666666,
323
  "mlm_chrf": 93.14626958884273,
324
  "asr_wer": 1.0,
325
- "overall_score": 0.8236434960611553
 
326
  },
327
  {
328
  "language_name": "Hindi",
@@ -378,13 +384,15 @@
378
  "model": "elevenlabs/scribe_v1",
379
  "model_type": "speech-to-text",
380
  "asr_wer": 0.2338948365728121,
381
- "overall_score": 0.2338948365728121
 
382
  },
383
  {
384
  "model": "openai/whisper-large-v3",
385
  "model_type": "speech-to-text",
386
  "asr_wer": 0.43522263872986894,
387
- "overall_score": 0.43522263872986894
 
388
  }
389
  ],
390
  "commonvoice_hours": 16.0,
@@ -403,7 +411,8 @@
403
  "cls_acc": 0.6333333333333334,
404
  "mlm_chrf": 94.16368134606655,
405
  "asr_wer": 0.33455873765134053,
406
- "overall_score": 0.6035554987690951
 
407
  },
408
  {
409
  "language_name": "Spanish",
@@ -459,13 +468,15 @@
459
  "model": "elevenlabs/scribe_v1",
460
  "model_type": "speech-to-text",
461
  "asr_wer": 0.19653905528613333,
462
- "overall_score": 0.19653905528613333
 
463
  },
464
  {
465
  "model": "openai/whisper-large-v3",
466
  "model_type": "speech-to-text",
467
  "asr_wer": 0.17561491933862197,
468
- "overall_score": 0.17561491933862197
 
469
  }
470
  ],
471
  "commonvoice_hours": 446.0,
@@ -517,7 +528,8 @@
517
  "cls_acc": 0.6733333333333333,
518
  "mlm_chrf": 95.99334001231053,
519
  "asr_wer": 0.18607698731237765,
520
- "overall_score": 0.5651493654109723
 
521
  },
522
  {
523
  "language_name": "Arabic",
@@ -573,13 +585,15 @@
573
  "model": "elevenlabs/scribe_v1",
574
  "model_type": "speech-to-text",
575
  "asr_wer": 0.2685436379713873,
576
- "overall_score": 0.2685436379713873
 
577
  },
578
  {
579
  "model": "openai/whisper-large-v3",
580
  "model_type": "speech-to-text",
581
  "asr_wer": 0.17370718156523782,
582
- "overall_score": 0.17370718156523782
 
583
  }
584
  ],
585
  "commonvoice_hours": 91.0,
@@ -630,7 +644,8 @@
630
  "cls_acc": 0.6733333333333332,
631
  "mlm_chrf": 94.58207181664275,
632
  "asr_wer": 0.22112540976831258,
633
- "overall_score": 0.5717701162636791
 
634
  },
635
  {
636
  "language_name": "Urdu",
@@ -650,7 +665,8 @@
650
  "model": "elevenlabs/scribe_v1",
651
  "model_type": "speech-to-text",
652
  "asr_wer": 0.2982973325975355,
653
- "overall_score": 0.2982973325975355
 
654
  }
655
  ],
656
  "commonvoice_hours": 77.0,
@@ -668,7 +684,8 @@
668
  "cls_acc": 0.43333333333333335,
669
  "mlm_chrf": 94.38802161979918,
670
  "asr_wer": 0.2982973325975355,
671
- "overall_score": 0.457636877070121
 
672
  },
673
  {
674
  "language_name": "French",
@@ -688,7 +705,8 @@
688
  "model": "elevenlabs/scribe_v1",
689
  "model_type": "speech-to-text",
690
  "asr_wer": 0.2610754929736961,
691
- "overall_score": 0.2610754929736961
 
692
  }
693
  ],
694
  "commonvoice_hours": 1052.0,
@@ -763,7 +781,8 @@
763
  "cls_acc": 0.5666666666666667,
764
  "mlm_chrf": 97.12318847922649,
765
  "asr_wer": 0.2610754929736961,
766
- "overall_score": 0.47984683162801145
 
767
  },
768
  {
769
  "language_name": "Bangla",
@@ -783,7 +802,8 @@
783
  "model": "elevenlabs/scribe_v1",
784
  "model_type": "speech-to-text",
785
  "asr_wer": 0.26686188207927336,
786
- "overall_score": 0.26686188207927336
 
787
  }
788
  ],
789
  "commonvoice_hours": 49.0,
@@ -801,7 +821,8 @@
801
  "cls_acc": 0.4,
802
  "mlm_chrf": 90.6067262108039,
803
  "asr_wer": 0.26686188207927336,
804
- "overall_score": 0.4369301366595233
 
805
  },
806
  {
807
  "language_name": "Portuguese",
@@ -821,7 +842,8 @@
821
  "model": "elevenlabs/scribe_v1",
822
  "model_type": "speech-to-text",
823
  "asr_wer": 0.22967756370402836,
824
- "overall_score": 0.22967756370402836
 
825
  }
826
  ],
827
  "commonvoice_hours": 177.0,
@@ -850,7 +872,8 @@
850
  "cls_acc": 0.5666666666666667,
851
  "mlm_chrf": 96.52676764996336,
852
  "asr_wer": 0.22967756370402836,
853
- "overall_score": 0.4611647035169646
 
854
  },
855
  {
856
  "language_name": "Punjabi",
@@ -870,7 +893,8 @@
870
  "model": "elevenlabs/scribe_v1",
871
  "model_type": "speech-to-text",
872
  "asr_wer": 0.20953788908863977,
873
- "overall_score": 0.20953788908863977
 
874
  }
875
  ],
876
  "commonvoice_hours": 2.3,
@@ -889,6 +913,7 @@
889
  "cls_acc": 0.5333333333333333,
890
  "mlm_chrf": 90.10119297923285,
891
  "asr_wer": 0.20953788908863977,
892
- "overall_score": 0.4280026333780908
 
893
  }
894
  ]
 
53
  "model": "elevenlabs/scribe_v1",
54
  "model_type": "speech-to-text",
55
  "asr_wer": 0.34916319968417603,
56
+ "asr_chrf": 78.55986690446153,
57
+ "overall_score": 39.45451505207285
58
  },
59
  {
60
  "model": "openai/whisper-large-v3",
61
  "model_type": "speech-to-text",
62
  "asr_wer": 0.25418986127300397,
63
+ "asr_chrf": 86.52016887049808,
64
+ "overall_score": 43.387179365885544
65
  }
66
  ],
67
  "commonvoice_hours": 2651.0,
 
229
  "cls_acc": 0.6533333333333333,
230
  "mlm_chrf": 93.48244773503015,
231
  "asr_wer": 0.30167653047859,
232
+ "asr_chrf": 82.5400178874798,
233
+ "overall_score": 12.369611012726589
234
  },
235
  {
236
  "language_name": "Chinese",
 
286
  "model": "elevenlabs/scribe_v1",
287
  "model_type": "speech-to-text",
288
  "asr_wer": 1.0,
289
+ "asr_chrf": 70.77419107011707,
290
+ "overall_score": 35.887095535058535
291
  },
292
  {
293
  "model": "openai/whisper-large-v3",
294
  "model_type": "speech-to-text",
295
  "asr_wer": 1.0,
296
+ "asr_chrf": 77.48220275963784,
297
+ "overall_score": 39.24110137981892
298
  }
299
  ],
300
  "commonvoice_hours": 422.0,
 
327
  "cls_acc": 0.6666666666666666,
328
  "mlm_chrf": 93.14626958884273,
329
  "asr_wer": 1.0,
330
+ "asr_chrf": 74.12819691487746,
331
+ "overall_score": 11.270528769615078
332
  },
333
  {
334
  "language_name": "Hindi",
 
384
  "model": "elevenlabs/scribe_v1",
385
  "model_type": "speech-to-text",
386
  "asr_wer": 0.2338948365728121,
387
+ "asr_chrf": 83.13389660250954,
388
+ "overall_score": 41.68389571954118
389
  },
390
  {
391
  "model": "openai/whisper-large-v3",
392
  "model_type": "speech-to-text",
393
  "asr_wer": 0.43522263872986894,
394
+ "asr_chrf": 63.049286642268754,
395
+ "overall_score": 31.74225464049931
396
  }
397
  ],
398
  "commonvoice_hours": 16.0,
 
411
  "cls_acc": 0.6333333333333334,
412
  "mlm_chrf": 94.16368134606655,
413
  "asr_wer": 0.33455873765134053,
414
+ "asr_chrf": 73.09159162238915,
415
+ "overall_score": 10.997417339445926
416
  },
417
  {
418
  "language_name": "Spanish",
 
468
  "model": "elevenlabs/scribe_v1",
469
  "model_type": "speech-to-text",
470
  "asr_wer": 0.19653905528613333,
471
+ "asr_chrf": 89.3473231669277,
472
+ "overall_score": 44.771931111106916
473
  },
474
  {
475
  "model": "openai/whisper-large-v3",
476
  "model_type": "speech-to-text",
477
  "asr_wer": 0.17561491933862197,
478
+ "asr_chrf": 91.85915393819565,
479
+ "overall_score": 46.01738442876714
480
  }
481
  ],
482
  "commonvoice_hours": 446.0,
 
528
  "cls_acc": 0.6733333333333333,
529
  "mlm_chrf": 95.99334001231053,
530
  "asr_wer": 0.18607698731237765,
531
+ "asr_chrf": 90.60323855256166,
532
+ "overall_score": 13.481886731875155
533
  },
534
  {
535
  "language_name": "Arabic",
 
585
  "model": "elevenlabs/scribe_v1",
586
  "model_type": "speech-to-text",
587
  "asr_wer": 0.2685436379713873,
588
+ "asr_chrf": 83.61712973768607,
589
+ "overall_score": 41.94283668782873
590
  },
591
  {
592
  "model": "openai/whisper-large-v3",
593
  "model_type": "speech-to-text",
594
  "asr_wer": 0.17370718156523782,
595
+ "asr_chrf": 88.68973505122798,
596
+ "overall_score": 44.43172111639661
597
  }
598
  ],
599
  "commonvoice_hours": 91.0,
 
644
  "cls_acc": 0.6733333333333332,
645
  "mlm_chrf": 94.58207181664275,
646
  "asr_wer": 0.22112540976831258,
647
+ "asr_chrf": 86.15343239445703,
648
+ "overall_score": 12.84781397121921
649
  },
650
  {
651
  "language_name": "Urdu",
 
665
  "model": "elevenlabs/scribe_v1",
666
  "model_type": "speech-to-text",
667
  "asr_wer": 0.2982973325975355,
668
+ "asr_chrf": 78.04754030614318,
669
+ "overall_score": 39.17291881937036
670
  }
671
  ],
672
  "commonvoice_hours": 77.0,
 
684
  "cls_acc": 0.43333333333333335,
685
  "mlm_chrf": 94.38802161979918,
686
  "asr_wer": 0.2982973325975355,
687
+ "asr_chrf": 78.04754030614318,
688
+ "overall_score": 19.894947620456534
689
  },
690
  {
691
  "language_name": "French",
 
705
  "model": "elevenlabs/scribe_v1",
706
  "model_type": "speech-to-text",
707
  "asr_wer": 0.2610754929736961,
708
+ "asr_chrf": 87.75590287945104,
709
+ "overall_score": 44.00848918621237
710
  }
711
  ],
712
  "commonvoice_hours": 1052.0,
 
781
  "cls_acc": 0.5666666666666667,
782
  "mlm_chrf": 97.12318847922649,
783
  "asr_wer": 0.2610754929736961,
784
+ "asr_chrf": 87.75590287945104,
785
+ "overall_score": 22.35355367824735
786
  },
787
  {
788
  "language_name": "Bangla",
 
802
  "model": "elevenlabs/scribe_v1",
803
  "model_type": "speech-to-text",
804
  "asr_wer": 0.26686188207927336,
805
+ "asr_chrf": 85.49430743996201,
806
+ "overall_score": 42.88058466102064
807
  }
808
  ],
809
  "commonvoice_hours": 49.0,
 
821
  "cls_acc": 0.4,
822
  "mlm_chrf": 90.6067262108039,
823
  "asr_wer": 0.26686188207927336,
824
+ "asr_chrf": 85.49430743996201,
825
+ "overall_score": 21.743791526130206
826
  },
827
  {
828
  "language_name": "Portuguese",
 
842
  "model": "elevenlabs/scribe_v1",
843
  "model_type": "speech-to-text",
844
  "asr_wer": 0.22967756370402836,
845
+ "asr_chrf": 89.03888828875101,
846
+ "overall_score": 44.63428292622752
847
  }
848
  ],
849
  "commonvoice_hours": 177.0,
 
872
  "cls_acc": 0.5666666666666667,
873
  "mlm_chrf": 96.52676764996336,
874
  "asr_wer": 0.22967756370402836,
875
+ "asr_chrf": 89.03888828875101,
876
+ "overall_score": 22.66346738477871
877
  },
878
  {
879
  "language_name": "Punjabi",
 
893
  "model": "elevenlabs/scribe_v1",
894
  "model_type": "speech-to-text",
895
  "asr_wer": 0.20953788908863977,
896
+ "asr_chrf": 84.56957135963033,
897
+ "overall_score": 42.389554624359484
898
  }
899
  ],
900
  "commonvoice_hours": 2.3,
 
913
  "cls_acc": 0.5333333333333333,
914
  "mlm_chrf": 90.10119297923285,
915
  "asr_wer": 0.20953788908863977,
916
+ "asr_chrf": 84.56957135963033,
917
+ "overall_score": 21.518011001013512
918
  }
919
  ]