Spaces:

Pipe1213
/

VITS_Walloon_Graphemes

Sleeping

App Files Files Community

VITS_Walloon_Graphemes / text /cleaners.py

Pipe1213

Update text/cleaners.py

4e392aa verified 10 months ago

raw

history blame contribute delete

1.77 kB

	""" from https://github.com/keithito/tacotron """

	'''
	Cleaners are transformations that run over the input text at both training and eval time.

	'''

	import re
	from unidecode import unidecode
	from phonemizer import phonemize

	_whitespace_re = re.compile(r'\s+')

	def lowercase(text):
	return text.lower()

	def collapse_whitespace(text):
	return re.sub(_whitespace_re, ' ', text)

	def replace_quote(text):
	return text.replace('’', "'")

	def remove_special_characters(text):
	# Define the characters to remove
	characters_to_remove = ['«', '»', '–', '[', ']', '{', '}', '\|']
	# Remove the characters from the text
	for char in characters_to_remove:
	text = text.replace(char, '')
	return text

	def remove_hyphen_at_start(text):
	# Check if the text starts with '-'
	if text.startswith('-'):
	# Remove the hyphen at the start
	text = text[1:].lstrip()
	return text

	def basic_cleaners(text):
	'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
	text = lowercase(text)
	text = text.replace('å','å')
	text = text.replace('´', "'")
	text = text.replace('à','a')
	text = text.replace('qu','K')
	text = text.replace('Qu','K')
	text = text.replace('gngn','djn')
	text = text.replace('GNGN','djn')
	text = text.replace('djdj','dj')
	text = text.replace('qw','kw')
	text = text.replace('emb','anb')
	text = text.replace('emp','anp')
	text = text.replace('eû', 'eu')
	text = text.replace('au', 'å')
	text = text.replace('t′', 'te')
	text = text.replace('s′', 'sse')
	text = text.replace('aî','ai')
	text = collapse_whitespace(text)
	text = replace_quote(text)
	text = remove_special_characters(text)
	text = remove_hyphen_at_start(text)
	return text