Spaces:
Running
Running
Update Space (evaluate main: 077df0db)
Browse files- requirements.txt +1 -1
- xtreme_s.py +49 -22
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate@
|
2 |
scikit-learn
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@077df0db12e9e5f85a8b3d4b49547117a1c623a3
|
2 |
scikit-learn
|
xtreme_s.py
CHANGED
@@ -91,13 +91,14 @@ _CONFIG_NAMES = ["fleurs-asr", "mls", "voxpopuli", "babel", "covost2", "fleurs-l
|
|
91 |
SENTENCE_DELIMITER = ""
|
92 |
|
93 |
try:
|
94 |
-
|
95 |
|
96 |
_jiwer_available = True
|
97 |
except ImportError:
|
98 |
_jiwer_available = False
|
99 |
|
100 |
if _jiwer_available and version.parse(importlib_metadata.version("jiwer")) < version.parse("2.3.0"):
|
|
|
101 |
|
102 |
class SentencesToListOfCharacters(tr.AbstractTransform):
|
103 |
def __init__(self, sentence_delimiter: str = " "):
|
@@ -117,7 +118,9 @@ if _jiwer_available and version.parse(importlib_metadata.version("jiwer")) < ver
|
|
117 |
cer_transform = tr.Compose(
|
118 |
[tr.RemoveMultipleSpaces(), tr.Strip(), SentencesToListOfCharacters(SENTENCE_DELIMITER)]
|
119 |
)
|
120 |
-
elif _jiwer_available:
|
|
|
|
|
121 |
cer_transform = tr.Compose(
|
122 |
[
|
123 |
tr.RemoveMultipleSpaces(),
|
@@ -187,35 +190,59 @@ def bleu(
|
|
187 |
|
188 |
def wer_and_cer(preds, labels, concatenate_texts, config_name):
|
189 |
try:
|
190 |
-
|
191 |
except ImportError:
|
192 |
raise ValueError(
|
193 |
f"jiwer has to be installed in order to apply the wer metric for {config_name}."
|
194 |
"You can install it via `pip install jiwer`."
|
195 |
)
|
196 |
|
197 |
-
if
|
198 |
-
|
|
|
199 |
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
incorrect
|
215 |
-
|
216 |
-
return
|
217 |
-
|
218 |
-
return {"wer": compute_score(preds, labels, "wer"), "cer": compute_score(preds, labels, "cer")}
|
219 |
|
220 |
|
221 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
|
|
91 |
SENTENCE_DELIMITER = ""
|
92 |
|
93 |
try:
|
94 |
+
import jiwer
|
95 |
|
96 |
_jiwer_available = True
|
97 |
except ImportError:
|
98 |
_jiwer_available = False
|
99 |
|
100 |
if _jiwer_available and version.parse(importlib_metadata.version("jiwer")) < version.parse("2.3.0"):
|
101 |
+
from jiwer import transforms as tr
|
102 |
|
103 |
class SentencesToListOfCharacters(tr.AbstractTransform):
|
104 |
def __init__(self, sentence_delimiter: str = " "):
|
|
|
118 |
cer_transform = tr.Compose(
|
119 |
[tr.RemoveMultipleSpaces(), tr.Strip(), SentencesToListOfCharacters(SENTENCE_DELIMITER)]
|
120 |
)
|
121 |
+
elif _jiwer_available and hasattr(jiwer, "compute_measures"):
|
122 |
+
from jiwer import transforms as tr
|
123 |
+
|
124 |
cer_transform = tr.Compose(
|
125 |
[
|
126 |
tr.RemoveMultipleSpaces(),
|
|
|
190 |
|
191 |
def wer_and_cer(preds, labels, concatenate_texts, config_name):
|
192 |
try:
|
193 |
+
import jiwer
|
194 |
except ImportError:
|
195 |
raise ValueError(
|
196 |
f"jiwer has to be installed in order to apply the wer metric for {config_name}."
|
197 |
"You can install it via `pip install jiwer`."
|
198 |
)
|
199 |
|
200 |
+
if hasattr(jiwer, "compute_measures"):
|
201 |
+
if concatenate_texts:
|
202 |
+
wer = jiwer.compute_measures(labels, preds)["wer"]
|
203 |
|
204 |
+
cer = jiwer.compute_measures(
|
205 |
+
labels, preds, truth_transform=cer_transform, hypothesis_transform=cer_transform
|
206 |
+
)["wer"]
|
207 |
+
return {"wer": wer, "cer": cer}
|
208 |
+
else:
|
209 |
+
|
210 |
+
def compute_score(preds, labels, score_type="wer"):
|
211 |
+
incorrect = 0
|
212 |
+
total = 0
|
213 |
+
for prediction, reference in zip(preds, labels):
|
214 |
+
if score_type == "wer":
|
215 |
+
measures = jiwer.compute_measures(reference, prediction)
|
216 |
+
elif score_type == "cer":
|
217 |
+
measures = jiwer.compute_measures(
|
218 |
+
reference, prediction, truth_transform=cer_transform, hypothesis_transform=cer_transform
|
219 |
+
)
|
220 |
+
incorrect += measures["substitutions"] + measures["deletions"] + measures["insertions"]
|
221 |
+
total += measures["substitutions"] + measures["deletions"] + measures["hits"]
|
222 |
+
return incorrect / total
|
223 |
+
|
224 |
+
return {"wer": compute_score(preds, labels, "wer"), "cer": compute_score(preds, labels, "cer")}
|
225 |
else:
|
226 |
+
if concatenate_texts:
|
227 |
+
wer = jiwer.process_words(labels, preds).wer
|
228 |
+
|
229 |
+
cer = jiwer.process_characters(labels, preds).cer
|
230 |
+
return {"wer": wer, "cer": cer}
|
231 |
+
else:
|
232 |
|
233 |
+
def compute_score(preds, labels, score_type="wer"):
|
234 |
+
incorrect = 0
|
235 |
+
total = 0
|
236 |
+
for prediction, reference in zip(preds, labels):
|
237 |
+
if score_type == "wer":
|
238 |
+
measures = jiwer.process_words(reference, prediction)
|
239 |
+
elif score_type == "cer":
|
240 |
+
measures = jiwer.process_characters(reference, prediction)
|
241 |
+
incorrect += measures.substitutions + measures.deletions + measures.insertions
|
242 |
+
total += measures.substitutions + measures.deletions + measures.hits
|
243 |
+
return incorrect / total
|
244 |
+
|
245 |
+
return {"wer": compute_score(preds, labels, "wer"), "cer": compute_score(preds, labels, "cer")}
|
|
|
|
|
246 |
|
247 |
|
248 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|