versae commited on
Commit
a1e8de0
1 Parent(s): df9222e

Fix eval with proper hesitation regexes

Browse files
Files changed (1) hide show
  1. eval.py +4 -4
eval.py CHANGED
@@ -90,10 +90,10 @@ def normalize_text(text: str, dataset: str) -> str:
90
  text = re.sub('[úùüû]', 'u', text)
91
  text = re.compile(r"-?[1-9][\d.]*").sub(lambda x: n2w(x.group(0), lang="no"), text)
92
  text = re.sub('\s+', ' ', text)
93
- text = re.sub("<ee(eh)?>", "e", text)
94
- text = re.sub("<mmm?>", "m", text)
95
- text = re.sub("<qq>", "q", text)
96
- text = re.sub("<inaudible>", "i", text)
97
 
98
  # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
99
  # # note that order is important here!
 
90
  text = re.sub('[úùüû]', 'u', text)
91
  text = re.compile(r"-?[1-9][\d.]*").sub(lambda x: n2w(x.group(0), lang="no"), text)
92
  text = re.sub('\s+', ' ', text)
93
+ text = re.sub('<ee>', 'eee', text)
94
+ text = re.sub('<qq>', 'qqq', text)
95
+ text = re.sub('<mm>', 'mmm', text)
96
+ text = re.sub('<inaudible>', 'xxx', text)
97
 
98
  # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
99
  # # note that order is important here!