File size: 6,472 Bytes
5cd28f2 0806a18 c774b6d 5cd28f2 0806a18 ae20e1b f73dbf6 ae20e1b f73dbf6 0806a18 f73dbf6 0806a18 f73dbf6 1e4af7c 0806a18 f73dbf6 0806a18 f73dbf6 3e1f438 c774b6d a347f8f 45e6546 78a5c08 45e6546 a347f8f 3e1f438 d585457 3e1f438 3d6609c 45e6546 3d6609c 45e6546 a347f8f 3d6609c 45e6546 3d6609c f73dbf6 3d6609c f73dbf6 c774b6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import streamlit as st
from pandas import read_csv
import os
import jiwer
from huggingface_hub import Repository
import zipfile
REFERENCE_NAME = "references"
SUBMISSION_NAME = "submissions"
REFERENCE_URL = os.path.join(
"https://huggingface.co/datasets/esc-bench", REFERENCE_NAME
)
SUBMISSION_URL = os.path.join(
"https://huggingface.co/datasets/esc-bench", SUBMISSION_NAME
)
TEST_SETS = [
"librispeech-clean",
"librispeech-other",
"common-voice-9",
"vox-populi",
"ted-lium",
"giga-speech",
"spgi-speech",
"earnings-22",
"ami",
]
EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
OPTIONAL_TEST_SETS = ["switch-board", "call-home", "chime-4"]
CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
HF_TOKEN = os.environ.get("HF_TOKEN")
def compute_wer(pred_file, ref_file):
with open(pred_file, "r", encoding="utf-8") as pred, open(
ref_file, "r", encoding="utf-8"
) as ref:
pred_lines = [line.strip() for line in pred.readlines()]
ref_lines = [line.strip() for line in ref.readlines()]
wer = jiwer.wer(ref_lines, pred_lines)
return wer
reference_repo = Repository(
local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN
)
submission_repo = Repository(
local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN
)
submission_repo.git_pull()
all_submissions = [
folder
for folder in os.listdir(SUBMISSION_NAME)
if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"
]
COLUMN_NAMES = {
"librispeech-clean": "ls-clean",
"librispeech-other": "ls-other",
"common-voice-9": "cv9",
"vox-populi": "vox",
"ted-lium": "ted",
"giga-speech": "giga",
"spgi-speech": "spgi",
"earnings-22": "e22",
"ami": "ami",
"chime-4": "chime",
"switch-board": "swbd",
}
all_results = read_csv(CSV_RESULTS_FILE)
# Write table form CSV
table = all_results.copy()
esc_column = table.pop("esc-score")
name_column = table.pop("name")
table.insert(0, "esc-score", esc_column)
# TODO: revert to scaling raw WER by 100 to retrieve % point values
table = table.select_dtypes(exclude=['object', 'string']) # * 100
table.insert(0, "name", name_column)
table = table.round(2)
table = table.rename(columns=COLUMN_NAMES)
# start indexing from 1
table.index = table.index + 1
# Streamlit
st.markdown("# ESC: A Benchmark For Multi-Domain End-to-End Speech Recognition")
st.markdown(
f"""
This is the leaderboard of the End-to end Speech Challenge (ESC).
Submitted systems are ranked by the **ESC Score** which is the average of
all non-optional datasets: {', '.join(COLUMN_NAMES.values())}."""
)
# st.table(table)
table
st.markdown(
"""
ESC was proposed in *ESC: A Benchmark For Multi-Domain End-to-End Speech Recognition* by ...
\n
The abstract of the paper is as follows:
\n
*Speech recognition applications cover a range of different audio and text distributions, with different speaking styles, background noise, transcription punctuation and character casing. However, many speech recognition systems require dataset-specific tuning (audio filtering, punctuation removal and normalisation of casing), therefore assuming a-priori knowledge of both the audio and text distributions. This tuning requirement can lead to systems failing to generalise to other datasets and domains. To promote the development of multi-domain speech systems, we introduce the End-to end Speech Challenge (ESC) for evaluating the performance of a single automatic speech recognition (ASR) system across a broad set of speech datasets. Benchmarked systems must use the same data pre- and post-processing algorithm across datasets - assuming the audio and text data distributions are a-priori unknown. We compare a series of state-of-the-art (SoTA) end-to-end (E2E) systems on this benchmark, demonstrating how a single speechsystem can be applied and evaluated on a wide range of data distributions. We find E2E systems to be effective across datasets: in a fair comparison, E2E systems achieve within 2.6% of SoTA systems tuned to a specific dataset. Our analysis reveals that transcription artefacts, such as punctuation and casing, pose difficulties for ASR systems and should be included in evaluation. We believe E2E benchmarking over a range of datasets promotes the research of multi-domain speech recognition systems.*
\n
For more information, please see the official submission on [OpenReview.net](https://openreview.net/forum?id=9OL2fIfDLK).
"""
)
st.markdown("To submit to ESC, please click on the instructions below ↓")
st.markdown("TODO: Add instructions ...")
# Using the "with" syntax
with st.form(key="my_form"):
uploaded_file = st.file_uploader("Choose a zip file")
submit_button = st.form_submit_button(label="Submit")
if submit_button:
if uploaded_file is None:
raise ValueError("Please make sure to have uploaded a zip file.")
submission = uploaded_file.name.split(".zip")[0]
with st.spinner(f"Uploading {submission}..."):
with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
zip_ref.extractall(submission_repo.local_dir)
submission_repo.push_to_hub()
with st.spinner(f"Computing ESC Score for {submission}..."):
results = {"name": submission}
submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
submitted_files = [f for f in submitted_files if f in EXPECTED_TEST_FILES]
if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
raise ValueError(
f"{', '.join(submitted_files)} were submitted, but expected {', '.join(EXPECTED_TEST_FILES)}"
)
for file in submitted_files:
ref_file = os.path.join(REFERENCE_NAME, file)
pred_file = os.path.join(SUBMISSION_NAME, submission, file)
wer = compute_wer(pred_file, ref_file)
results[file.split(".")[0]] = str(wer)
wer_values = [float(results[t]) for t in TEST_SETS]
all_wer = sum(wer_values) / len(wer_values)
results["esc-score"] = all_wer
all_results = all_results.append(results, ignore_index=True)
# save and upload new evaluated results
all_results.to_csv(CSV_RESULTS_FILE)
commit_url = submission_repo.push_to_hub()
st.success('Please refresh this space (CTRL+R) to see your result')
|