import re from text.japanese import japanese_to_romaji_with_accent from text.mandarin import chinese_to_romaji from text.english import english_to_ipa2 from text.german import german_to_ipa from text.croatia_to_ipa import croatian_to_ipa def cjehd_cleaners(text): chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text) japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text) croatian_texts = re.findall(r'\[CR\].*?\[CR\]', text) english_texts = re.findall(r'\[EN\].*?\[EN\]', text) german_texts = re.findall(r'\[DE\].*?\[DE\]', text) for chinese_text in chinese_texts: cleaned_text = chinese_to_romaji(chinese_text[4:-4]) text = text.replace(chinese_text, cleaned_text+' ', 1) for japanese_text in japanese_texts: cleaned_text = japanese_to_romaji_with_accent( japanese_text[4:-4]).replace('ts', 'ʦ').replace('u', 'ɯ').replace('...', '…') text = text.replace(japanese_text, cleaned_text+' ', 1) for english_text in english_texts: cleaned_text = english_to_ipa2(english_text[4:-4]) text = text.replace(english_text, cleaned_text+' ', 1) for croatian_text in croatian_texts: cleaned_text = croatian_to_ipa(croatian_text[4:-4]) cleaned_text = cleaned_text.replace('ḱ','k') text = text.replace(croatian_text, cleaned_text + ' ', 1) for german_text in german_texts: german_text = german_text.replace('...','').replace('--','').replace('-','') cleaned_text = german_to_ipa(german_text[4:-4]) text = text.replace(german_text, cleaned_text + ' ', 1) text = text[:-1] if re.match(r'[^\.,!\?\-…~]', text[-1]): text += '.' return text