David Pomerenke
commited on
Commit
·
4973af4
1
Parent(s):
8633921
Add ASR ChrF scores
Browse files- app.py +11 -1
- evals.py +8 -3
- results.json +50 -25
app.py
CHANGED
@@ -64,13 +64,23 @@ METRICS = {
|
|
64 |
"display_name": "Automatic Speech Recognition (WER)",
|
65 |
"field_name": "asr_wer",
|
66 |
"label": "WER",
|
67 |
-
"explanation": """
|
68 |
**Automatic Speech Recognition Word Error Rate**: Measures the accuracy of speech-to-text transcription.
|
69 |
It calculates the minimum number of word edits (insertions, deletions, substitutions) needed to transform the
|
70 |
transcription into the reference text, divided by the number of words in the reference.
|
71 |
Lower scores indicate better performance, with 0 being perfect transcription.
|
72 |
""",
|
73 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
}
|
75 |
|
76 |
|
|
|
64 |
"display_name": "Automatic Speech Recognition (WER)",
|
65 |
"field_name": "asr_wer",
|
66 |
"label": "WER",
|
67 |
+
"explanation": """
|
68 |
**Automatic Speech Recognition Word Error Rate**: Measures the accuracy of speech-to-text transcription.
|
69 |
It calculates the minimum number of word edits (insertions, deletions, substitutions) needed to transform the
|
70 |
transcription into the reference text, divided by the number of words in the reference.
|
71 |
Lower scores indicate better performance, with 0 being perfect transcription.
|
72 |
""",
|
73 |
},
|
74 |
+
"asr_chrf": {
|
75 |
+
"display_name": "Automatic Speech Recognition ChrF",
|
76 |
+
"field_name": "asr_chrf",
|
77 |
+
"label": "ChrF",
|
78 |
+
"explanation": """
|
79 |
+
**Automatic Speech Recognition ChrF**: Character n-gram F-score evaluates translations at the character level rather than word level.
|
80 |
+
This metric is particularly valuable for morphologically rich languages and can better capture partial word matches.
|
81 |
+
Higher scores (0-1) indicate better translations.
|
82 |
+
""",
|
83 |
+
},
|
84 |
}
|
85 |
|
86 |
|
evals.py
CHANGED
@@ -421,11 +421,13 @@ async def transcribe_and_evaluate(model, language_bcp_47, nr):
|
|
421 |
item = fleurs.iloc[nr]
|
422 |
path = f"data/fleurs/{language.fleurs_tag}/audio/dev/{item.fname}"
|
423 |
pred = await transcribe(path, model=model)
|
424 |
-
|
|
|
425 |
return {
|
426 |
"model": model,
|
427 |
"bcp_47": language["bcp_47"],
|
428 |
-
"asr_wer":
|
|
|
429 |
"sentence_nr": nr,
|
430 |
}
|
431 |
|
@@ -532,12 +534,14 @@ async def main():
|
|
532 |
if not scores_asr:
|
533 |
continue
|
534 |
asr_wer = mean([s["asr_wer"] for s in scores_asr])
|
|
|
535 |
results.append(
|
536 |
{
|
537 |
"model": model,
|
538 |
"model_type": "speech-to-text",
|
539 |
"asr_wer": asr_wer,
|
540 |
-
"
|
|
|
541 |
}
|
542 |
)
|
543 |
if results:
|
@@ -561,6 +565,7 @@ async def main():
|
|
561 |
"cls_acc",
|
562 |
"mlm_chrf",
|
563 |
"asr_wer",
|
|
|
564 |
"overall_score",
|
565 |
]:
|
566 |
language_results[score] = mean(
|
|
|
421 |
item = fleurs.iloc[nr]
|
422 |
path = f"data/fleurs/{language.fleurs_tag}/audio/dev/{item.fname}"
|
423 |
pred = await transcribe(path, model=model)
|
424 |
+
wer_score = wer.compute(predictions=[pred], references=[item.transcription])
|
425 |
+
chrf_score = chrf.compute(predictions=[pred], references=[item.transcription])
|
426 |
return {
|
427 |
"model": model,
|
428 |
"bcp_47": language["bcp_47"],
|
429 |
+
"asr_wer": wer_score,
|
430 |
+
"asr_chrf": chrf_score["score"],
|
431 |
"sentence_nr": nr,
|
432 |
}
|
433 |
|
|
|
534 |
if not scores_asr:
|
535 |
continue
|
536 |
asr_wer = mean([s["asr_wer"] for s in scores_asr])
|
537 |
+
asr_chrf = mean([s["asr_chrf"] for s in scores_asr])
|
538 |
results.append(
|
539 |
{
|
540 |
"model": model,
|
541 |
"model_type": "speech-to-text",
|
542 |
"asr_wer": asr_wer,
|
543 |
+
"asr_chrf": asr_chrf,
|
544 |
+
"overall_score": (asr_wer + asr_chrf) / 2,
|
545 |
}
|
546 |
)
|
547 |
if results:
|
|
|
565 |
"cls_acc",
|
566 |
"mlm_chrf",
|
567 |
"asr_wer",
|
568 |
+
"asr_chrf",
|
569 |
"overall_score",
|
570 |
]:
|
571 |
language_results[score] = mean(
|
results.json
CHANGED
@@ -53,13 +53,15 @@
|
|
53 |
"model": "elevenlabs/scribe_v1",
|
54 |
"model_type": "speech-to-text",
|
55 |
"asr_wer": 0.34916319968417603,
|
56 |
-
"
|
|
|
57 |
},
|
58 |
{
|
59 |
"model": "openai/whisper-large-v3",
|
60 |
"model_type": "speech-to-text",
|
61 |
"asr_wer": 0.25418986127300397,
|
62 |
-
"
|
|
|
63 |
}
|
64 |
],
|
65 |
"commonvoice_hours": 2651.0,
|
@@ -227,7 +229,8 @@
|
|
227 |
"cls_acc": 0.6533333333333333,
|
228 |
"mlm_chrf": 93.48244773503015,
|
229 |
"asr_wer": 0.30167653047859,
|
230 |
-
"
|
|
|
231 |
},
|
232 |
{
|
233 |
"language_name": "Chinese",
|
@@ -283,13 +286,15 @@
|
|
283 |
"model": "elevenlabs/scribe_v1",
|
284 |
"model_type": "speech-to-text",
|
285 |
"asr_wer": 1.0,
|
286 |
-
"
|
|
|
287 |
},
|
288 |
{
|
289 |
"model": "openai/whisper-large-v3",
|
290 |
"model_type": "speech-to-text",
|
291 |
"asr_wer": 1.0,
|
292 |
-
"
|
|
|
293 |
}
|
294 |
],
|
295 |
"commonvoice_hours": 422.0,
|
@@ -322,7 +327,8 @@
|
|
322 |
"cls_acc": 0.6666666666666666,
|
323 |
"mlm_chrf": 93.14626958884273,
|
324 |
"asr_wer": 1.0,
|
325 |
-
"
|
|
|
326 |
},
|
327 |
{
|
328 |
"language_name": "Hindi",
|
@@ -378,13 +384,15 @@
|
|
378 |
"model": "elevenlabs/scribe_v1",
|
379 |
"model_type": "speech-to-text",
|
380 |
"asr_wer": 0.2338948365728121,
|
381 |
-
"
|
|
|
382 |
},
|
383 |
{
|
384 |
"model": "openai/whisper-large-v3",
|
385 |
"model_type": "speech-to-text",
|
386 |
"asr_wer": 0.43522263872986894,
|
387 |
-
"
|
|
|
388 |
}
|
389 |
],
|
390 |
"commonvoice_hours": 16.0,
|
@@ -403,7 +411,8 @@
|
|
403 |
"cls_acc": 0.6333333333333334,
|
404 |
"mlm_chrf": 94.16368134606655,
|
405 |
"asr_wer": 0.33455873765134053,
|
406 |
-
"
|
|
|
407 |
},
|
408 |
{
|
409 |
"language_name": "Spanish",
|
@@ -459,13 +468,15 @@
|
|
459 |
"model": "elevenlabs/scribe_v1",
|
460 |
"model_type": "speech-to-text",
|
461 |
"asr_wer": 0.19653905528613333,
|
462 |
-
"
|
|
|
463 |
},
|
464 |
{
|
465 |
"model": "openai/whisper-large-v3",
|
466 |
"model_type": "speech-to-text",
|
467 |
"asr_wer": 0.17561491933862197,
|
468 |
-
"
|
|
|
469 |
}
|
470 |
],
|
471 |
"commonvoice_hours": 446.0,
|
@@ -517,7 +528,8 @@
|
|
517 |
"cls_acc": 0.6733333333333333,
|
518 |
"mlm_chrf": 95.99334001231053,
|
519 |
"asr_wer": 0.18607698731237765,
|
520 |
-
"
|
|
|
521 |
},
|
522 |
{
|
523 |
"language_name": "Arabic",
|
@@ -573,13 +585,15 @@
|
|
573 |
"model": "elevenlabs/scribe_v1",
|
574 |
"model_type": "speech-to-text",
|
575 |
"asr_wer": 0.2685436379713873,
|
576 |
-
"
|
|
|
577 |
},
|
578 |
{
|
579 |
"model": "openai/whisper-large-v3",
|
580 |
"model_type": "speech-to-text",
|
581 |
"asr_wer": 0.17370718156523782,
|
582 |
-
"
|
|
|
583 |
}
|
584 |
],
|
585 |
"commonvoice_hours": 91.0,
|
@@ -630,7 +644,8 @@
|
|
630 |
"cls_acc": 0.6733333333333332,
|
631 |
"mlm_chrf": 94.58207181664275,
|
632 |
"asr_wer": 0.22112540976831258,
|
633 |
-
"
|
|
|
634 |
},
|
635 |
{
|
636 |
"language_name": "Urdu",
|
@@ -650,7 +665,8 @@
|
|
650 |
"model": "elevenlabs/scribe_v1",
|
651 |
"model_type": "speech-to-text",
|
652 |
"asr_wer": 0.2982973325975355,
|
653 |
-
"
|
|
|
654 |
}
|
655 |
],
|
656 |
"commonvoice_hours": 77.0,
|
@@ -668,7 +684,8 @@
|
|
668 |
"cls_acc": 0.43333333333333335,
|
669 |
"mlm_chrf": 94.38802161979918,
|
670 |
"asr_wer": 0.2982973325975355,
|
671 |
-
"
|
|
|
672 |
},
|
673 |
{
|
674 |
"language_name": "French",
|
@@ -688,7 +705,8 @@
|
|
688 |
"model": "elevenlabs/scribe_v1",
|
689 |
"model_type": "speech-to-text",
|
690 |
"asr_wer": 0.2610754929736961,
|
691 |
-
"
|
|
|
692 |
}
|
693 |
],
|
694 |
"commonvoice_hours": 1052.0,
|
@@ -763,7 +781,8 @@
|
|
763 |
"cls_acc": 0.5666666666666667,
|
764 |
"mlm_chrf": 97.12318847922649,
|
765 |
"asr_wer": 0.2610754929736961,
|
766 |
-
"
|
|
|
767 |
},
|
768 |
{
|
769 |
"language_name": "Bangla",
|
@@ -783,7 +802,8 @@
|
|
783 |
"model": "elevenlabs/scribe_v1",
|
784 |
"model_type": "speech-to-text",
|
785 |
"asr_wer": 0.26686188207927336,
|
786 |
-
"
|
|
|
787 |
}
|
788 |
],
|
789 |
"commonvoice_hours": 49.0,
|
@@ -801,7 +821,8 @@
|
|
801 |
"cls_acc": 0.4,
|
802 |
"mlm_chrf": 90.6067262108039,
|
803 |
"asr_wer": 0.26686188207927336,
|
804 |
-
"
|
|
|
805 |
},
|
806 |
{
|
807 |
"language_name": "Portuguese",
|
@@ -821,7 +842,8 @@
|
|
821 |
"model": "elevenlabs/scribe_v1",
|
822 |
"model_type": "speech-to-text",
|
823 |
"asr_wer": 0.22967756370402836,
|
824 |
-
"
|
|
|
825 |
}
|
826 |
],
|
827 |
"commonvoice_hours": 177.0,
|
@@ -850,7 +872,8 @@
|
|
850 |
"cls_acc": 0.5666666666666667,
|
851 |
"mlm_chrf": 96.52676764996336,
|
852 |
"asr_wer": 0.22967756370402836,
|
853 |
-
"
|
|
|
854 |
},
|
855 |
{
|
856 |
"language_name": "Punjabi",
|
@@ -870,7 +893,8 @@
|
|
870 |
"model": "elevenlabs/scribe_v1",
|
871 |
"model_type": "speech-to-text",
|
872 |
"asr_wer": 0.20953788908863977,
|
873 |
-
"
|
|
|
874 |
}
|
875 |
],
|
876 |
"commonvoice_hours": 2.3,
|
@@ -889,6 +913,7 @@
|
|
889 |
"cls_acc": 0.5333333333333333,
|
890 |
"mlm_chrf": 90.10119297923285,
|
891 |
"asr_wer": 0.20953788908863977,
|
892 |
-
"
|
|
|
893 |
}
|
894 |
]
|
|
|
53 |
"model": "elevenlabs/scribe_v1",
|
54 |
"model_type": "speech-to-text",
|
55 |
"asr_wer": 0.34916319968417603,
|
56 |
+
"asr_chrf": 78.55986690446153,
|
57 |
+
"overall_score": 39.45451505207285
|
58 |
},
|
59 |
{
|
60 |
"model": "openai/whisper-large-v3",
|
61 |
"model_type": "speech-to-text",
|
62 |
"asr_wer": 0.25418986127300397,
|
63 |
+
"asr_chrf": 86.52016887049808,
|
64 |
+
"overall_score": 43.387179365885544
|
65 |
}
|
66 |
],
|
67 |
"commonvoice_hours": 2651.0,
|
|
|
229 |
"cls_acc": 0.6533333333333333,
|
230 |
"mlm_chrf": 93.48244773503015,
|
231 |
"asr_wer": 0.30167653047859,
|
232 |
+
"asr_chrf": 82.5400178874798,
|
233 |
+
"overall_score": 12.369611012726589
|
234 |
},
|
235 |
{
|
236 |
"language_name": "Chinese",
|
|
|
286 |
"model": "elevenlabs/scribe_v1",
|
287 |
"model_type": "speech-to-text",
|
288 |
"asr_wer": 1.0,
|
289 |
+
"asr_chrf": 70.77419107011707,
|
290 |
+
"overall_score": 35.887095535058535
|
291 |
},
|
292 |
{
|
293 |
"model": "openai/whisper-large-v3",
|
294 |
"model_type": "speech-to-text",
|
295 |
"asr_wer": 1.0,
|
296 |
+
"asr_chrf": 77.48220275963784,
|
297 |
+
"overall_score": 39.24110137981892
|
298 |
}
|
299 |
],
|
300 |
"commonvoice_hours": 422.0,
|
|
|
327 |
"cls_acc": 0.6666666666666666,
|
328 |
"mlm_chrf": 93.14626958884273,
|
329 |
"asr_wer": 1.0,
|
330 |
+
"asr_chrf": 74.12819691487746,
|
331 |
+
"overall_score": 11.270528769615078
|
332 |
},
|
333 |
{
|
334 |
"language_name": "Hindi",
|
|
|
384 |
"model": "elevenlabs/scribe_v1",
|
385 |
"model_type": "speech-to-text",
|
386 |
"asr_wer": 0.2338948365728121,
|
387 |
+
"asr_chrf": 83.13389660250954,
|
388 |
+
"overall_score": 41.68389571954118
|
389 |
},
|
390 |
{
|
391 |
"model": "openai/whisper-large-v3",
|
392 |
"model_type": "speech-to-text",
|
393 |
"asr_wer": 0.43522263872986894,
|
394 |
+
"asr_chrf": 63.049286642268754,
|
395 |
+
"overall_score": 31.74225464049931
|
396 |
}
|
397 |
],
|
398 |
"commonvoice_hours": 16.0,
|
|
|
411 |
"cls_acc": 0.6333333333333334,
|
412 |
"mlm_chrf": 94.16368134606655,
|
413 |
"asr_wer": 0.33455873765134053,
|
414 |
+
"asr_chrf": 73.09159162238915,
|
415 |
+
"overall_score": 10.997417339445926
|
416 |
},
|
417 |
{
|
418 |
"language_name": "Spanish",
|
|
|
468 |
"model": "elevenlabs/scribe_v1",
|
469 |
"model_type": "speech-to-text",
|
470 |
"asr_wer": 0.19653905528613333,
|
471 |
+
"asr_chrf": 89.3473231669277,
|
472 |
+
"overall_score": 44.771931111106916
|
473 |
},
|
474 |
{
|
475 |
"model": "openai/whisper-large-v3",
|
476 |
"model_type": "speech-to-text",
|
477 |
"asr_wer": 0.17561491933862197,
|
478 |
+
"asr_chrf": 91.85915393819565,
|
479 |
+
"overall_score": 46.01738442876714
|
480 |
}
|
481 |
],
|
482 |
"commonvoice_hours": 446.0,
|
|
|
528 |
"cls_acc": 0.6733333333333333,
|
529 |
"mlm_chrf": 95.99334001231053,
|
530 |
"asr_wer": 0.18607698731237765,
|
531 |
+
"asr_chrf": 90.60323855256166,
|
532 |
+
"overall_score": 13.481886731875155
|
533 |
},
|
534 |
{
|
535 |
"language_name": "Arabic",
|
|
|
585 |
"model": "elevenlabs/scribe_v1",
|
586 |
"model_type": "speech-to-text",
|
587 |
"asr_wer": 0.2685436379713873,
|
588 |
+
"asr_chrf": 83.61712973768607,
|
589 |
+
"overall_score": 41.94283668782873
|
590 |
},
|
591 |
{
|
592 |
"model": "openai/whisper-large-v3",
|
593 |
"model_type": "speech-to-text",
|
594 |
"asr_wer": 0.17370718156523782,
|
595 |
+
"asr_chrf": 88.68973505122798,
|
596 |
+
"overall_score": 44.43172111639661
|
597 |
}
|
598 |
],
|
599 |
"commonvoice_hours": 91.0,
|
|
|
644 |
"cls_acc": 0.6733333333333332,
|
645 |
"mlm_chrf": 94.58207181664275,
|
646 |
"asr_wer": 0.22112540976831258,
|
647 |
+
"asr_chrf": 86.15343239445703,
|
648 |
+
"overall_score": 12.84781397121921
|
649 |
},
|
650 |
{
|
651 |
"language_name": "Urdu",
|
|
|
665 |
"model": "elevenlabs/scribe_v1",
|
666 |
"model_type": "speech-to-text",
|
667 |
"asr_wer": 0.2982973325975355,
|
668 |
+
"asr_chrf": 78.04754030614318,
|
669 |
+
"overall_score": 39.17291881937036
|
670 |
}
|
671 |
],
|
672 |
"commonvoice_hours": 77.0,
|
|
|
684 |
"cls_acc": 0.43333333333333335,
|
685 |
"mlm_chrf": 94.38802161979918,
|
686 |
"asr_wer": 0.2982973325975355,
|
687 |
+
"asr_chrf": 78.04754030614318,
|
688 |
+
"overall_score": 19.894947620456534
|
689 |
},
|
690 |
{
|
691 |
"language_name": "French",
|
|
|
705 |
"model": "elevenlabs/scribe_v1",
|
706 |
"model_type": "speech-to-text",
|
707 |
"asr_wer": 0.2610754929736961,
|
708 |
+
"asr_chrf": 87.75590287945104,
|
709 |
+
"overall_score": 44.00848918621237
|
710 |
}
|
711 |
],
|
712 |
"commonvoice_hours": 1052.0,
|
|
|
781 |
"cls_acc": 0.5666666666666667,
|
782 |
"mlm_chrf": 97.12318847922649,
|
783 |
"asr_wer": 0.2610754929736961,
|
784 |
+
"asr_chrf": 87.75590287945104,
|
785 |
+
"overall_score": 22.35355367824735
|
786 |
},
|
787 |
{
|
788 |
"language_name": "Bangla",
|
|
|
802 |
"model": "elevenlabs/scribe_v1",
|
803 |
"model_type": "speech-to-text",
|
804 |
"asr_wer": 0.26686188207927336,
|
805 |
+
"asr_chrf": 85.49430743996201,
|
806 |
+
"overall_score": 42.88058466102064
|
807 |
}
|
808 |
],
|
809 |
"commonvoice_hours": 49.0,
|
|
|
821 |
"cls_acc": 0.4,
|
822 |
"mlm_chrf": 90.6067262108039,
|
823 |
"asr_wer": 0.26686188207927336,
|
824 |
+
"asr_chrf": 85.49430743996201,
|
825 |
+
"overall_score": 21.743791526130206
|
826 |
},
|
827 |
{
|
828 |
"language_name": "Portuguese",
|
|
|
842 |
"model": "elevenlabs/scribe_v1",
|
843 |
"model_type": "speech-to-text",
|
844 |
"asr_wer": 0.22967756370402836,
|
845 |
+
"asr_chrf": 89.03888828875101,
|
846 |
+
"overall_score": 44.63428292622752
|
847 |
}
|
848 |
],
|
849 |
"commonvoice_hours": 177.0,
|
|
|
872 |
"cls_acc": 0.5666666666666667,
|
873 |
"mlm_chrf": 96.52676764996336,
|
874 |
"asr_wer": 0.22967756370402836,
|
875 |
+
"asr_chrf": 89.03888828875101,
|
876 |
+
"overall_score": 22.66346738477871
|
877 |
},
|
878 |
{
|
879 |
"language_name": "Punjabi",
|
|
|
893 |
"model": "elevenlabs/scribe_v1",
|
894 |
"model_type": "speech-to-text",
|
895 |
"asr_wer": 0.20953788908863977,
|
896 |
+
"asr_chrf": 84.56957135963033,
|
897 |
+
"overall_score": 42.389554624359484
|
898 |
}
|
899 |
],
|
900 |
"commonvoice_hours": 2.3,
|
|
|
913 |
"cls_acc": 0.5333333333333333,
|
914 |
"mlm_chrf": 90.10119297923285,
|
915 |
"asr_wer": 0.20953788908863977,
|
916 |
+
"asr_chrf": 84.56957135963033,
|
917 |
+
"overall_score": 21.518011001013512
|
918 |
}
|
919 |
]
|