NbAiLab
/

wav2vec2-1b-npsc-nst

Automatic Speech Recognition

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

versae commited on Feb 2, 2023

Commit

a64382f

•

1 Parent(s): af15a73

Update eval.py

Files changed (1) hide show

eval.py +4 -5

eval.py CHANGED Viewed

@@ -130,11 +130,10 @@ def normalize_text(original_text: str, dataset: str) -> str:
         text = re.sub('[úùüû]', 'u', text)
         text = re.sub('[«»]', '', text)
         text = re.sub('\s+', ' ', text)
-    text = re.sub('<e+h?>', 'eee', text)
-    text = re.sub('<m+>', 'mmm', text)
-    text = re.sub('<q+>', 'qqq', text)
     text = re.sub('<inaudible>', 'xxx', text)
-    text = re.sub('[<>]', '', text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!
@@ -143,7 +142,7 @@ def normalize_text(original_text: str, dataset: str) -> str:
     # for t in token_sequences_to_ignore:
     #     text = " ".join(text.split(t))
-    return text.strip()
 def main(args):

         text = re.sub('[úùüû]', 'u', text)
         text = re.sub('[«»]', '', text)
         text = re.sub('\s+', ' ', text)
+    text = re.sub('<ee>', 'eee', text)
+    text = re.sub('<qq>', 'qqq', text)
+    text = re.sub('<mm>', 'mmm', text)
     text = re.sub('<inaudible>', 'xxx', text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!
     # for t in token_sequences_to_ignore:
     #     text = " ".join(text.split(t))
+    return text
 def main(args):