Fix eval with proper hesitation regexes
Browse files
eval.py
CHANGED
@@ -90,10 +90,10 @@ def normalize_text(text: str, dataset: str) -> str:
|
|
90 |
text = re.sub('[úùüû]', 'u', text)
|
91 |
text = re.compile(r"-?[1-9][\d.]*").sub(lambda x: n2w(x.group(0), lang="no"), text)
|
92 |
text = re.sub('\s+', ' ', text)
|
93 |
-
text = re.sub(
|
94 |
-
text = re.sub(
|
95 |
-
text = re.sub(
|
96 |
-
text = re.sub(
|
97 |
|
98 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
99 |
# # note that order is important here!
|
|
|
90 |
text = re.sub('[úùüû]', 'u', text)
|
91 |
text = re.compile(r"-?[1-9][\d.]*").sub(lambda x: n2w(x.group(0), lang="no"), text)
|
92 |
text = re.sub('\s+', ' ', text)
|
93 |
+
text = re.sub('<ee>', 'eee', text)
|
94 |
+
text = re.sub('<qq>', 'qqq', text)
|
95 |
+
text = re.sub('<mm>', 'mmm', text)
|
96 |
+
text = re.sub('<inaudible>', 'xxx', text)
|
97 |
|
98 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
99 |
# # note that order is important here!
|