Spaces:
Sleeping
Sleeping
""" from https://github.com/keithito/tacotron """ | |
''' | |
Cleaners are transformations that run over the input text at both training and eval time. | |
''' | |
import re | |
from unidecode import unidecode | |
from phonemizer import phonemize | |
_whitespace_re = re.compile(r'\s+') | |
def lowercase(text): | |
return text.lower() | |
def collapse_whitespace(text): | |
return re.sub(_whitespace_re, ' ', text) | |
def replace_quote(text): | |
return text.replace('’', "'") | |
def remove_special_characters(text): | |
# Define the characters to remove | |
characters_to_remove = ['«', '»', '–', '[', ']', '{', '}', '|'] | |
# Remove the characters from the text | |
for char in characters_to_remove: | |
text = text.replace(char, '') | |
return text | |
def remove_hyphen_at_start(text): | |
# Check if the text starts with '-' | |
if text.startswith('-'): | |
# Remove the hyphen at the start | |
text = text[1:].lstrip() | |
return text | |
def basic_cleaners(text): | |
'''Basic pipeline that lowercases and collapses whitespace without transliteration.''' | |
text = lowercase(text) | |
text = text.replace('å','å') | |
text = text.replace('´', "'") | |
text = text.replace('à','a') | |
text = text.replace('qu','K') | |
text = text.replace('Qu','K') | |
text = text.replace('gngn','djn') | |
text = text.replace('GNGN','djn') | |
text = text.replace('djdj','dj') | |
text = text.replace('qw','kw') | |
text = text.replace('emb','anb') | |
text = text.replace('emp','anp') | |
text = text.replace('eû', 'eu') | |
text = text.replace('au', 'å') | |
text = text.replace('t′', 'te') | |
text = text.replace('s′', 'sse') | |
text = text.replace('aî','ai') | |
text = collapse_whitespace(text) | |
text = replace_quote(text) | |
text = remove_special_characters(text) | |
text = remove_hyphen_at_start(text) | |
return text | |