Update eval.py
Browse files
eval.py
CHANGED
@@ -130,10 +130,11 @@ def normalize_text(original_text: str, dataset: str) -> str:
|
|
130 |
text = re.sub('[úùüû]', 'u', text)
|
131 |
text = re.sub('[«»]', '', text)
|
132 |
text = re.sub('\s+', ' ', text)
|
133 |
-
text = re.sub('<
|
134 |
-
text = re.sub('<
|
135 |
-
text = re.sub('<
|
136 |
text = re.sub('<inaudible>', 'xxx', text)
|
|
|
137 |
|
138 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
139 |
# # note that order is important here!
|
|
|
130 |
text = re.sub('[úùüû]', 'u', text)
|
131 |
text = re.sub('[«»]', '', text)
|
132 |
text = re.sub('\s+', ' ', text)
|
133 |
+
text = re.sub('<e+h?>', 'eee', text)
|
134 |
+
text = re.sub('<m+>', 'mmm', text)
|
135 |
+
text = re.sub('<q+>', 'qqq', text)
|
136 |
text = re.sub('<inaudible>', 'xxx', text)
|
137 |
+
text = re.sub('[<>]', '', text)
|
138 |
|
139 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
140 |
# # note that order is important here!
|