NbAiLab
/

wav2vec2-1b-npsc-nst

Automatic Speech Recognition

Generated from Trainer

Model card Files Files and versions Community

versae commited on Jan 31, 2023

Commit

af15a73

·

1 Parent(s): 6e3e8e8

Update eval.py

Files changed (1) hide show

eval.py +4 -3

eval.py CHANGED Viewed

@@ -130,10 +130,11 @@ def normalize_text(original_text: str, dataset: str) -> str:
         text = re.sub('[úùüû]', 'u', text)
         text = re.sub('[«»]', '', text)
         text = re.sub('\s+', ' ', text)
-    text = re.sub('<ee>', 'eee', text)
-    text = re.sub('<qq>', 'qqq', text)
-    text = re.sub('<mm>', 'mmm', text)
     text = re.sub('<inaudible>', 'xxx', text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!

         text = re.sub('[úùüû]', 'u', text)
         text = re.sub('[«»]', '', text)
         text = re.sub('\s+', ' ', text)
+    text = re.sub('<e+h?>', 'eee', text)
+    text = re.sub('<m+>', 'mmm', text)
+    text = re.sub('<q+>', 'qqq', text)
     text = re.sub('<inaudible>', 'xxx', text)
+    text = re.sub('[<>]', '', text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!