Update eval.py
Browse files
eval.py
CHANGED
@@ -130,11 +130,10 @@ def normalize_text(original_text: str, dataset: str) -> str:
|
|
130 |
text = re.sub('[úùüû]', 'u', text)
|
131 |
text = re.sub('[«»]', '', text)
|
132 |
text = re.sub('\s+', ' ', text)
|
133 |
-
text = re.sub('<
|
134 |
-
text = re.sub('<
|
135 |
-
text = re.sub('<
|
136 |
text = re.sub('<inaudible>', 'xxx', text)
|
137 |
-
text = re.sub('[<>]', '', text)
|
138 |
|
139 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
140 |
# # note that order is important here!
|
@@ -143,7 +142,7 @@ def normalize_text(original_text: str, dataset: str) -> str:
|
|
143 |
# for t in token_sequences_to_ignore:
|
144 |
# text = " ".join(text.split(t))
|
145 |
|
146 |
-
return text
|
147 |
|
148 |
|
149 |
def main(args):
|
|
|
130 |
text = re.sub('[úùüû]', 'u', text)
|
131 |
text = re.sub('[«»]', '', text)
|
132 |
text = re.sub('\s+', ' ', text)
|
133 |
+
text = re.sub('<ee>', 'eee', text)
|
134 |
+
text = re.sub('<qq>', 'qqq', text)
|
135 |
+
text = re.sub('<mm>', 'mmm', text)
|
136 |
text = re.sub('<inaudible>', 'xxx', text)
|
|
|
137 |
|
138 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
139 |
# # note that order is important here!
|
|
|
142 |
# for t in token_sequences_to_ignore:
|
143 |
# text = " ".join(text.split(t))
|
144 |
|
145 |
+
return text
|
146 |
|
147 |
|
148 |
def main(args):
|