NbAiLab
/

wav2vec2-1b-npsc-nst

@@ -90,10 +90,10 @@ def normalize_text(text: str, dataset: str) -> str:
         text = re.sub('[úùüû]', 'u', text)
         text = re.compile(r"-?[1-9][\d.]*").sub(lambda x: n2w(x.group(0), lang="no"), text)
         text = re.sub('\s+', ' ', text)
-    text = re.sub("<ee(eh)?>", "e", text)
-    text = re.sub("<mmm?>", "m", text)
-    text = re.sub("<qq>", "q", text)
-    text = re.sub("<inaudible>", "i", text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!

         text = re.sub('[úùüû]', 'u', text)
         text = re.compile(r"-?[1-9][\d.]*").sub(lambda x: n2w(x.group(0), lang="no"), text)
         text = re.sub('\s+', ' ', text)
+    text = re.sub('<ee>', 'eee', text)
+    text = re.sub('<qq>', 'qqq', text)
+    text = re.sub('<mm>', 'mmm', text)
+    text = re.sub('<inaudible>', 'xxx', text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!