Commit
·
cc912a2
1
Parent(s):
06b8201
Update text/cleaners.py
Browse files- text/cleaners.py +25 -10
text/cleaners.py
CHANGED
@@ -64,15 +64,30 @@ def cjks_cleaners(text):
|
|
64 |
|
65 |
|
66 |
def cjke_cleaners(text):
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
return text
|
78 |
|
|
|
64 |
|
65 |
|
66 |
def cjke_cleaners(text):
|
67 |
+
chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
|
68 |
+
japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
|
69 |
+
korean_texts = re.findall(r'\[KO\].*?\[KO\]', text)
|
70 |
+
english_texts = re.findall(r'\[EN\].*?\[EN\]', text)
|
71 |
+
for chinese_text in chinese_texts:
|
72 |
+
cleaned_text = chinese_to_lazy_ipa(chinese_text[4:-4])
|
73 |
+
cleaned_text = cleaned_text.replace(
|
74 |
+
'ʧ', 'tʃ').replace('ʦ', 'ts').replace('ɥan', 'ɥæn')
|
75 |
+
text = text.replace(chinese_text, cleaned_text+' ', 1)
|
76 |
+
for japanese_text in japanese_texts:
|
77 |
+
cleaned_text = japanese_to_ipa(japanese_text[4:-4])
|
78 |
+
cleaned_text = cleaned_text.replace('ʧ', 'tʃ').replace(
|
79 |
+
'ʦ', 'ts').replace('ɥan', 'ɥæn').replace('ʥ', 'dz')
|
80 |
+
text = text.replace(japanese_text, cleaned_text+' ', 1)
|
81 |
+
for korean_text in korean_texts:
|
82 |
+
cleaned_text = korean_to_ipa(korean_text[4:-4])
|
83 |
+
text = text.replace(korean_text, cleaned_text+' ', 1)
|
84 |
+
for english_text in english_texts:
|
85 |
+
cleaned_text = english_to_ipa2(english_text[4:-4])
|
86 |
+
cleaned_text = cleaned_text.replace('ɑ', 'a').replace(
|
87 |
+
'ɔ', 'o').replace('ɛ', 'e').replace('ɪ', 'i').replace('ʊ', 'u')
|
88 |
+
text = text.replace(english_text, cleaned_text+' ', 1)
|
89 |
+
text = text[:-1]
|
90 |
+
if re.match(r'[^\.,!\?\-…~]', text[-1]):
|
91 |
+
text += '.'
|
92 |
return text
|
93 |
|